Index: CODE_OWNERS.TXT =================================================================== --- CODE_OWNERS.TXT +++ CODE_OWNERS.TXT @@ -16,6 +16,11 @@ E: resistor@mac.com D: SelectionDAG (lib/CodeGen/SelectionDAG/*) +N: Matt Arsenault +E: Matthew.Arsenault@amd.com +I: arsenm +D: HSAIL Target (lib/Target/HSAIL) + N: Rafael Avila de Espindola E: rafael.espindola@gmail.com D: Gold plugin (tools/gold/*) Index: docs/CompilerWriterInfo.rst =================================================================== --- docs/CompilerWriterInfo.rst +++ docs/CompilerWriterInfo.rst @@ -80,6 +80,11 @@ * `AMD GPU Programming Guide `_ * `AMD Compute Resources `_ +HSAIL +---- +* `HSA Programmer Reference Manual Specification 1.0 `_ +* `HSA Standards `_ + SPARC ----- Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -643,6 +643,7 @@ include "llvm/IR/IntrinsicsAArch64.td" include "llvm/IR/IntrinsicsXCore.td" include "llvm/IR/IntrinsicsHexagon.td" +include "llvm/IR/IntrinsicsHSAIL.td" include "llvm/IR/IntrinsicsNVVM.td" include "llvm/IR/IntrinsicsMips.td" include "llvm/IR/IntrinsicsAMDGPU.td" Index: include/llvm/IR/IntrinsicsHSAIL.td =================================================================== --- /dev/null +++ include/llvm/IR/IntrinsicsHSAIL.td @@ -0,0 +1,454 @@ +//===- IntrinsicsHSAIL.td - Defines HSAIL intrinsics -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the HSAIL-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +def llvm_hsail_group_ptr_ty : LLVMQualPointerType; +def llvm_hsail_flat_ptr_ty : LLVMQualPointerType; +def llvm_hsail_kernarg_ptr_ty : LLVMQualPointerType; + +let TargetPrefix = "hsail" in { + +class InstModIntrin_1Op : Intrinsic< + [llvm_anyfloat_ty], + [llvm_i1_ty, // ftz + llvm_i32_ty, // round + LLVMMatchType<0>], + [IntrNoMem] +>; + +class InstModIntrin_NoRound_1Op : Intrinsic< + [llvm_anyfloat_ty], + [llvm_i1_ty, // ftz + LLVMMatchType<0>], + [IntrNoMem] +>; + +class InstModIntrin_2Op : Intrinsic< + [llvm_anyfloat_ty], + [llvm_i1_ty, // ftz + llvm_i32_ty, // round + LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem] +>; + +class InstModIntrin_NoRound_2Op : Intrinsic< + [llvm_anyfloat_ty], + [llvm_i1_ty, // ftz + LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem] +>; + +class InstModIntrin_3Op : Intrinsic< + [llvm_anyfloat_ty], + [llvm_i1_ty, // ftz + llvm_i32_ty, // round + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>], + [IntrNoMem] +>; + +// 5.2 Integer Arithmetic Instructions +def int_hsail_smulhi : Intrinsic< + [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem] +>; + +def int_hsail_umulhi : Intrinsic< + [llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem] +>; + +// 5.4 24-Bit Integer Optimization Instructions +def int_hsail_smad24 : GCCBuiltin<"__builtin_hsail_smad24">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_hsail_umad24 : GCCBuiltin<"__builtin_hsail_umad24">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_hsail_smad24hi : GCCBuiltin<"__builtin_hsail_smad24hi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_hsail_umad24hi : GCCBuiltin<"__builtin_hsail_umad24hi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_hsail_smul24 : GCCBuiltin<"__builtin_hsail_smul24">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_umul24 : GCCBuiltin<"__builtin_hsail_umul24">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_smul24hi : GCCBuiltin<"__builtin_hsail_smul24hi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_umul24hi : GCCBuiltin<"__builtin_hsail_umul24hi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +// 5.7 Bit String Instructions +def int_hsail_sbitextract : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_ubitextract : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_sbitinsert : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_ubitinsert : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_bitmask : Intrinsic<[llvm_anyint_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_bitrev : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>], + [IntrNoMem] +>; + +def int_hsail_bitselect : Intrinsic<[llvm_anyint_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem] +>; + +def int_hsail_sfirstbit : Intrinsic<[llvm_i32_ty], + [llvm_anyint_ty], + [IntrNoMem] +>; + +def int_hsail_ufirstbit : Intrinsic<[llvm_i32_ty], + [llvm_anyint_ty], + [IntrNoMem] +>; + +def int_hsail_lastbit : Intrinsic<[llvm_i32_ty], + [llvm_anyint_ty], + [IntrNoMem] +>; + + +// 5.11 Floating-Point Arithmetic Instructions + +def int_hsail_fadd : InstModIntrin_2Op; + +// llvm.ceil should be preferred +def int_hsail_fceil : InstModIntrin_NoRound_1Op; + +def int_hsail_fdiv : InstModIntrin_2Op; + +// llvm.floor should be preferred +def int_hsail_ffloor : InstModIntrin_NoRound_1Op; + +// llvm.fma should be preferred +def int_hsail_ffma : InstModIntrin_3Op; + +// llvm.fract should be preferred +def int_hsail_ffract : InstModIntrin_NoRound_1Op; + +// llvm.maxnum should be preferred +def int_hsail_fmax : InstModIntrin_NoRound_2Op; + +// llvm.minnum should be preferred +def int_hsail_fmin : InstModIntrin_NoRound_2Op; + +def int_hsail_fmul : InstModIntrin_2Op; + +// llvm.rint should be preferred +def int_hsail_frint : InstModIntrin_NoRound_1Op; + +def int_hsail_fsqrt : InstModIntrin_1Op; +def int_hsail_fsub : InstModIntrin_2Op; + +// llvm.trunc should be preferred +def int_hsail_ftrunc : InstModIntrin_NoRound_1Op; + + +// 5.12 Floating-Point Optimization Instruction + +// fmul + fadd or llvm.fmuladd should be preferred +def int_hsail_fmad : InstModIntrin_3Op; + +// 5.13 Floating-Point Bit Instructions + +def int_hsail_class : Intrinsic< + [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem] +>; + +// 5.14 Native Floating-Point Instructions +def int_hsail_ncos : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +def int_hsail_nexp2 : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +def int_hsail_nfma : Intrinsic< + [llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem] +>; + +def int_hsail_nlog2 : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +def int_hsail_nrcp : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +// Prefer llvm.sqrt +def int_hsail_nrsqrt : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +def int_hsail_nsin : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +// Prefer llvm.sqrt +def int_hsail_nsqrt : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] +>; + +// 5.15 Multimedia Instructions +def int_hsail_bitalign : GCCBuiltin<"__builtin_hsail_bitalign">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_bytealign : GCCBuiltin<"__builtin_hsail_bytealign">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_lerp : GCCBuiltin<"__builtin_hsail_lerp">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_packcvt : GCCBuiltin<"__builtin_hsail_packcvt">, + Intrinsic<[llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], + [IntrNoMem] +>; + +def int_hsail_unpackcvt : GCCBuiltin<"__builtin_hsail_unpackcvt">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_sad_u32_u32 : + GCCBuiltin<"__builtin_hsail_sad_u32_u32">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_sad_u32_u16x2 : + GCCBuiltin<"__builtin_hsail_sad_u32_u16x2">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_sad_u32_u8x4 : + GCCBuiltin<"__builtin_hsail_sad_u32_u8x4">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +def int_hsail_sadhi : GCCBuiltin<"__builtin_hsail_sadhi">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem] +>; + +// 5.16 Segment Checking (segmentp) Instruction +def int_hsail_segmentp : GCCBuiltin<"__builtin_hsail_segmentp">, + Intrinsic< + [llvm_i1_ty], + [llvm_i32_ty, llvm_i1_ty, llvm_hsail_flat_ptr_ty], + [IntrNoMem]>; + +// 6.9 Memory Fence (memfence) Instruction +def int_hsail_memfence : GCCBuiltin<"__builtin_hsail_memfence">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>; + +// 7.6 Image Fence (imagefence) Instruction; +// FIXME: Probably doesn't really need noduplicate or convergent +def int_hsail_imagefence : GCCBuiltin<"__builtin_hsail_imagefence">, + Intrinsic<[], [], []>; + +// 9.1 Barrier Instructions +def int_hsail_barrier : GCCBuiltin<"__builtin_hsail_barrier">, + Intrinsic<[], [llvm_i32_ty], [IntrNoDuplicate, IntrConvergent]>; + +def int_hsail_wavebarrier : GCCBuiltin<"__builtin_hsail_wavebarrier">, + Intrinsic<[], [], [IntrNoDuplicate, IntrConvergent]>; + + +// 9.4 Cross-Lane Instructions +def int_hsail_activelanecount : + GCCBuiltin<"__builtin_hsail_activelanecount">, + Intrinsic< + [llvm_i32_ty], + [llvm_i32_ty, llvm_i1_ty], + [IntrReadMem, IntrConvergent] // XXX - IntrReadMem? +>; + +def int_hsail_activelaneid : + GCCBuiltin<"__builtin_hsail_activelaneid">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>; + +def int_hsail_activelanemask : Intrinsic< + [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_i32_ty, // width + llvm_i1_ty], // src + [IntrReadMem, IntrConvergent] +>; + +def int_hsail_activelanepermute : Intrinsic< + [llvm_anyint_ty], + [llvm_i32_ty, // width + LLVMMatchType<0>, // src + llvm_i32_ty, // laneId + LLVMMatchType<0>, // identity + llvm_i1_ty], // useIdentity + [IntrConvergent] +>; + +// 11.1 Kernel Dispatch Packet Instructions +def int_hsail_currentworkgroupsize : + GCCBuiltin<"__builtin_hsail_currentworkgroupsize">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_currentworkitemflatid : + GCCBuiltin<"__builtin_hsail_currentworkitemflatid">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +def int_hsail_dim : GCCBuiltin<"__builtin_hsail_dim">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +def int_hsail_gridgroups : GCCBuiltin<"__builtin_hsail_gridgroups">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_gridsize : GCCBuiltin<"__builtin_hsail_gridsize">, + Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem]>; + +// int_hsail_packetcompletionsig +def int_hsail_packetid : GCCBuiltin<"__builtin_hsail_packetid">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + +def int_hsail_workgroupid : GCCBuiltin<"__builtin_hsail_workgroupid">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_workgroupsize : + GCCBuiltin<"__builtin_hsail_workgroupsize">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_workitemabsid : + Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_workitemflatabsid : + Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; + +def int_hsail_workitemflatid : + GCCBuiltin<"__builtin_hsail_workitemflatid">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +def int_hsail_workitemid : GCCBuiltin<"__builtin_hsail_workitemid">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + +// 11.4 Miscellaneous Instructions +def int_hsail_clock : GCCBuiltin<"__builtin_hsail_clock">, + Intrinsic<[llvm_i64_ty], [], []>; + +def int_hsail_cuid : GCCBuiltin<"__builtin_hsail_cuid">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +// debugtrap + +def int_hsail_groupbaseptr : + GCCBuiltin<"__builtin_hsail_groupbaseptr">, + Intrinsic<[llvm_hsail_group_ptr_ty], [], [IntrNoMem]>; + +def int_hsail_kernargbaseptr : + GCCBuiltin<"__builtin_hsail_kernargbaseptr">, + Intrinsic<[llvm_hsail_kernarg_ptr_ty], [], [IntrNoMem]>; + +def int_hsail_laneid : GCCBuiltin<"__builtin_hsail_laneid">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +def int_hsail_maxcuid : GCCBuiltin<"__builtin_hsail_maxcuid">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +def int_hsail_maxwaveid : GCCBuiltin<"__builtin_hsail_maxwaveid">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + +// nop +// nullptr + +def int_hsail_waveid : GCCBuiltin<"__builtin_hsail_waveid">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + + +// GCN extensions +def int_hsail_gcn_fldexp : Intrinsic< + [llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem] +>; + +def int_hsail_gcn_bfm : GCCBuiltin<"__builtin_hsail_gcn_bfm">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_hsail_gcn_msad : GCCBuiltin<"__builtin_hsail_gcn_msad">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_hsail_gcn_qsad : GCCBuiltin<"__builtin_hsail_gcn_qsad">, + Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + +def int_hsail_gcn_mqsad: GCCBuiltin<"__builtin_hsail_gcn_mqsad">, + Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; + +// def int_hsail_gcn_mqsad4 + +def int_hsail_gcn_sadw : GCCBuiltin<"__builtin_hsail_gcn_sadw">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +def int_hsail_gcn_sadd : GCCBuiltin<"__builtin_hsail_gcn_sadd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + +// def int_hsail_gcn_consume +// def int_hsail_gcn_append + +} // End TargetPrefix = "hsail" Index: include/llvm/Support/ELF.h =================================================================== --- include/llvm/Support/ELF.h +++ include/llvm/Support/ELF.h @@ -234,6 +234,8 @@ // Controller EM_CE = 119, // Freescale Communication Engine RISC core EM_M32C = 120, // Renesas M32C series microprocessors + EM_HSAIL = 128, // HSAIL 32bit + EM_HSAIL_64 = 129, // HSAIL 64bit EM_TSK3000 = 131, // Altium TSK3000 core EM_RS08 = 132, // Freescale RS08 embedded processor EM_SHARC = 133, // Analog Devices SHARC family of 32-bit DSP Index: lib/Target/HSAIL/AMDOpenCLKernenv.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/AMDOpenCLKernenv.h @@ -0,0 +1,69 @@ +//===-- AMDOpenCLKernenv.h - HSAIL kernenv for OpenCL -----*- C++ -*---=======// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Declare OpenCL dispatch-specific constants that are passed +/// as additional arguments (the "kernenv") to the HSAIL kernel. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_AMDOPENCLKERNENV_H +#define LLVM_LIB_TARGET_HSAIL_AMDOPENCLKERNENV_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Support/ErrorHandling.h" + +namespace llvm { +class LLVMContext; + +namespace HSAIL { +enum { + KE_GOFFSET_0, + KE_GOFFSET_1, + KE_GOFFSET_2, + KE_OCL12_NUM_ARGS, + KE_PRINTF_BFR = KE_OCL12_NUM_ARGS, + KE_VQ_PTR, + KE_AQLWRAP_PTR, + KE_OCL20_NUM_ARGS, + KE_NUM_ARGS = KE_OCL20_NUM_ARGS // Always the last member. +}; +} + +static inline StringRef getKernenvArgName(unsigned ID) { + switch (ID) { + case HSAIL::KE_GOFFSET_0: + return "__global_offset_0"; + case HSAIL::KE_GOFFSET_1: + return "__global_offset_1"; + case HSAIL::KE_GOFFSET_2: + return "__global_offset_2"; + case HSAIL::KE_PRINTF_BFR: + return "__printf_buffer"; + case HSAIL::KE_VQ_PTR: + return "__vqueue_pointer"; + case HSAIL::KE_AQLWRAP_PTR: + return "__aqlwrap_pointer"; + default: + llvm_unreachable("unexpected Kernenv argument ID"); + } +} + +static inline Type *getKernenvArgType(unsigned ID, LLVMContext &C, + bool is64bit) { + if (ID == HSAIL::KE_PRINTF_BFR) + return Type::getInt8PtrTy(C, 1); + return (is64bit ? Type::getInt64Ty(C) : Type::getInt32Ty(C)); +} + +static inline StringRef getKernenvArgTypeName(unsigned ID) { return "size_t"; } +} + +#endif Index: lib/Target/HSAIL/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/CMakeLists.txt @@ -0,0 +1,54 @@ + +include(CMakeDependentOption) + +set(LLVM_TARGET_DEFINITIONS HSAIL.td) + +tablegen(LLVM HSAILGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM HSAILGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM HSAILGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM HSAILGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM HSAILGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM HSAILGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM HSAILGenCallingConv.inc -gen-callingconv) +tablegen(LLVM HSAILGenIntrinsics.inc -gen-tgt-intrinsic) + +add_public_tablegen_target(HSAILCommonTableGen) + +add_subdirectory(HSAILUtil) + +set(sources + HSAILAsmPrinter.cpp + HSAILAlwaysInlinePass.cpp + HSAILELFTargetObjectFile.cpp + HSAILFrameLowering.cpp + HSAILInstrInfo.cpp + HSAILIntrinsicInfo.cpp + HSAILISelDAGToDAG.cpp + HSAILISelLowering.cpp + HSAILMachineFunctionInfo.cpp + HSAILMCInstLower.cpp + HSAILModuleInfo.cpp + HSAILParamManager.cpp + HSAILRegisterInfo.cpp + HSAILSection.cpp + HSAILStoreInitializer.cpp + HSAILSubtarget.cpp + HSAILTargetMachine.cpp + HSAILUtilityFunctions.cpp + HSAILOpaqueTypes.cpp + ) + +if( CMAKE_CL_64 ) + # A workaround for a bug in cmake 2.8.3. See PR 8885. + if( CMAKE_VERSION STREQUAL "2.8.3" ) + include(CMakeDetermineCompilerId) + endif() +endif() + +add_llvm_target(HSAILCodeGen ${sources}) + +target_link_libraries(LLVMHSAILCodeGen PRIVATE LLVMHSAILUtil) + +add_subdirectory(InstPrinter) +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) Index: lib/Target/HSAIL/HSAIL.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAIL.h @@ -0,0 +1,133 @@ +//===-- HSAIL.h - Top-level interface for HSAIL representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAIL_H +#define LLVM_LIB_TARGET_HSAIL_HSAIL_H + +#include "llvm/Support/DataTypes.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCInstrDesc.h" + +#include "MCTargetDesc/HSAILMCTargetDesc.h" + +namespace llvm { + +class FunctionPass; +class HSAILTargetMachine; +class LoopPass; +class ModulePass; +class Target; + +/// Insert kernel index metadata for device enqueue. +ModulePass *createHSAILInsertKernelIndexMetadataPass(); + +/// This pass converts a legalized DAG into a HSAIL-specific DAG, ready for +/// instruction scheduling. +FunctionPass *createHSAILISelDag(TargetMachine &TM); + +ModulePass *createHSAILAlwaysInlinePass(); + +extern Target TheHSAIL_32Target, TheHSAIL_64Target; + +} // End llvm namespace + +namespace llvm { +namespace HSAILAS { + +enum AddressSpaces { + PRIVATE_ADDRESS = 0, + GLOBAL_ADDRESS = 1, + READONLY_ADDRESS = 2, + GROUP_ADDRESS = 3, + FLAT_ADDRESS = 4, + REGION_ADDRESS = 5, + SPILL_ADDRESS = 6, + KERNARG_ADDRESS = 7, + ARG_ADDRESS = 8, + ADDRESS_NONE = 9 +}; +} + +// Target flags from tablegen +// See HSAILInstFormats.td +namespace HSAILInstrFlags { +enum { + // Instruction kind. + InstAddr = 1 << 3, + InstAtomic = 1 << 4, + InstBasic = 1 << 5, + InstBr = 1 << 6, + InstCmp = 1 << 7, + InstCvt = 1 << 8, + InstImage = 1 << 9, + InstLane = 1 << 10, + InstMem = 1 << 11, + InstMemFence = 1 << 12, + InstMod = 1 << 13, + InstQueryImage = 1 << 14, + InstQuerySampler = 1 << 15, + InstQueue = 1 << 16, + InstSeg = 1 << 17, + InstSegCvt = 1 << 18, + InstSignal = 1 << 19, + InstSourceType = 1 << 20, + + // Others. + IS_CONV = 1 << 23, + IS_IMAGEINST = 1 << 24, + + // Default modifier attributes. Used for marking default values of a + // modifier for an instruction to skip printing it. + RoundAttrLo = 1 << 25, // 2 bits + RoundAttrHi = 1 << 26, + RoundAttr = RoundAttrLo | RoundAttrHi, + + WidthAttrLo = 1 << 27, // 2 bits + WidthAttrHi = 1 << 28, + WidthAttr = WidthAttrLo | WidthAttrHi, + + HasDefaultSegment = 1 << 29, + + InstBrigOpcodeLo = UINT64_C(1) << 48, + InstBrigOpcode = UINT64_C(0xffff) << 48 +}; +} + +namespace HSAILWidthAttrFlags { +enum { + WidthAttrNone = 0, + WidthAttrAll = 1, + WidthAttrWaveSize = 2, + WidthAttrOne = 3 +}; +} + +/// Enum for memory operand decoding +namespace HSAILADDRESS { +enum { BASE = 0, REG = 1, OFFSET = 2, ADDRESS_NUM_OPS }; +} + +/// Target architectures to optimize for +enum OptimizeForTargetArch { + GENERIC, ///< No target specific flavor + SI ///< Optimize fot Southern Islands family +}; + +namespace HSAIL { +enum OperandType { + /// Operand with register or immediate. + OPERAND_REG_IMM = llvm::MCOI::OPERAND_FIRST_TARGET +}; +} +} + +#define IMAGE_ARG_BIAS (1 << 16) + +#endif Index: lib/Target/HSAIL/HSAIL.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAIL.td @@ -0,0 +1,59 @@ +//===----- HSAIL.td - Target definition file for HSAIL ----*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// HSAIL Subtarget features. +//===----------------------------------------------------------------------===// + +def FeatureImages : SubtargetFeature<"images", + "HasImages", + "true", + "Enable image precision operations">; + +def FeatureGCN : SubtargetFeature<"gcn", + "IsGCN", + "true", + "Enable AMD GCN extensions">; + +//===----------------------------------------------------------------------===// +// HSAIL processors supported. +//===----------------------------------------------------------------------===// + +class Proc Features> + : Processor; + +def : Proc<"generic", []>; +def : Proc<"kaveri", [FeatureImages, FeatureGCN]>; + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "HSAILRegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "HSAILInstrInfo.td" + +def HSAILInstrInfo : InstrInfo; + +def BRIGAsmWriter : AsmWriter { + field bit isMCAsmWriter = 0; +} + +def HSAIL : Target { + let InstructionSet = HSAILInstrInfo; + + let AssemblyWriters = [BRIGAsmWriter]; +} + Index: lib/Target/HSAIL/HSAILAlwaysInlinePass.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILAlwaysInlinePass.cpp @@ -0,0 +1,69 @@ +//===-- HSAILAlwaysInlinePass.cpp - Promote Allocas ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This pass marks all internal functions as always_inline and creates +/// duplicates of all other functions a marks the duplicates as always_inline. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +namespace { + +class HSAILAlwaysInline : public ModulePass { + + static char ID; + +public: + HSAILAlwaysInline() : ModulePass(ID) {} + bool runOnModule(Module &M) override; + const char *getPassName() const override { + return "HSAIL Always Inline Pass"; + } +}; + +} // End anonymous namespace + +char HSAILAlwaysInline::ID = 0; + +bool HSAILAlwaysInline::runOnModule(Module &M) { + + std::vector FuncsToClone; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() && + !F.hasFnAttribute(Attribute::NoInline)) + FuncsToClone.push_back(&F); + } + + for (Function *F : FuncsToClone) { + ValueToValueMapTy VMap; + Function *NewFunc = CloneFunction(F, VMap, false); + NewFunc->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(NewFunc); + F->replaceAllUsesWith(NewFunc); + } + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + Function &F = *I; + if (F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::NoInline)) { + F.addFnAttr(Attribute::AlwaysInline); + } + } + return false; +} + +ModulePass *llvm::createHSAILAlwaysInlinePass() { + return new HSAILAlwaysInline(); +} Index: lib/Target/HSAIL/HSAILArithmetic.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILArithmetic.td @@ -0,0 +1,458 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +defm ADD : InstMod_2Op_IntTypes<"add", BrigOpcode.ADD>; +defm ADD : InstMod_2Op_FPTypes<"add", BrigOpcode.ADD>; + +defm SUB : InstMod_2Op_IntTypes<"sub", BrigOpcode.SUB>; +defm SUB : InstMod_2Op_FPTypes<"sub", BrigOpcode.SUB>; + +defm MUL : InstMod_2Op_IntTypes<"mul", BrigOpcode.MUL>; +defm MUL : InstMod_2Op_FPTypes<"mul", BrigOpcode.MUL>; + +defm DIV : InstMod_2Op_IntTypes<"div", BrigOpcode.DIV>; +defm DIV : InstMod_2Op_FPTypes<"div", BrigOpcode.DIV>; + +defm REM : InstBasic_2Op_IntTypes<"rem", BrigOpcode.REM>; + +defm MULHI : InstBasic_2Op_IntTypes<"mulhi", BrigOpcode.MULHI>; + +defm ABS : InstMod_1Op_SignedIntTypes<"abs", BrigOpcode.ABS>; +defm ABS : InstMod_1Op_FPTypes<"abs", BrigOpcode.ABS>; + +defm : InstMod_2Op_IntTypes_Pat<"ADD", add>; +defm : InstMod_2Op_FPTypes_Pat<"ADD", fadd>; +defm : InstMod_2Op_Intrin_FPTypes_Pat<"ADD", int_hsail_fadd>; + +defm : InstMod_2Op_IntTypes_Pat<"SUB", sub, 0>; +defm : InstMod_2Op_FPTypes_Pat<"SUB", fsub>; +defm : InstMod_2Op_Intrin_FPTypes_Pat<"SUB", int_hsail_fsub>; + +defm : InstMod_2Op_IntTypes_Pat<"MUL", mul, 0>; +defm : InstMod_2Op_FPTypes_Pat<"MUL", fmul>; +defm : InstMod_2Op_Intrin_FPTypes_Pat<"MUL", int_hsail_fmul>; + +defm : InstMod_2Op_IntTypes_Pat<"DIV", udiv, 0>; +defm : InstMod_2Op_IntTypes_Pat<"DIV", sdiv, 1>; +defm : InstMod_2Op_FPTypes_Pat<"DIV", fdiv>; +defm : InstMod_2Op_Intrin_FPTypes_Pat<"DIV", int_hsail_fdiv>; + +// Division without implicit ftz. +def : InstMod_2Op_Pat; + +defm : InstBasic_2Op_IntTypes_Pat<"REM", urem, 0>; +defm : InstBasic_2Op_IntTypes_Pat<"REM", srem, 1>; + +defm : InstBasic_2Op_IntTypes_Pat<"MULHI", mulhu, 0>; +defm : InstBasic_2Op_IntTypes_Pat<"MULHI", mulhs, 1>; + + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; + +defm MAD : InstBasic_3Op_SUF<"mad", BrigOpcode.MAD>; +defm : InstBasic_3Op_IntTypes_Pat<"MAD", HSAILumad, 0>; + +// u[32,64] forms illegal in HSAIL +defm NEG : InstBasic_1Op_SF<"neg", BrigOpcode.NEG>; + +let AddedComplexity = 2 in { +// Needs to match before sub pattern. +defm : InstBasic_1Op_IntTypes_Pat<"NEG", ineg, 1>; +} + +defm : InstBasic_1Op_FPTypes_Pat<"NEG", fneg>; + +defm COPYSIGN : InstMod_2Op_FPTypes<"copysign", BrigOpcode.COPYSIGN>; + +// FIXME: Need to support fcopysign with different second operand type. +def : InstMod_2Op_Pat; +def : InstMod_2Op_Pat; + + +defm FMA : InstMod_3Op_FPTypes<"fma", BrigOpcode.FMA>; +defm : InstMod_3Op_FPTypes_Pat<"FMA", fma>; + +defm NFMA : InstBasic_3Op_FPTypes<"nfma", BrigOpcode.NFMA>; +defm : InstBasic_3Op_FPTypes_Pat<"NFMA", HSAILnfma>; + +defm MAX : InstMod_2Op_SUF<"max", BrigOpcode.MAX>; +defm MIN : InstMod_2Op_SUF<"min", BrigOpcode.MIN>; + +defm : InstMod_2Op_FPTypes_Pat<"MAX", fmaxnum, BrigRound.NONE>; +defm : InstMod_2Op_FPTypes_Pat<"MIN", fminnum, BrigRound.NONE>; + +defm : InstMod_2Op_IntTypes_Pat<"MAX", HSAILumax, 0>; +defm : InstMod_2Op_IntTypes_Pat<"MAX", HSAILsmax, 1>; + +defm : InstMod_2Op_IntTypes_Pat<"MIN", HSAILumin, 0>; +defm : InstMod_2Op_IntTypes_Pat<"MIN", HSAILsmin, 1>; + +defm GCN_MAX : InstBasic_2Op_FPTypes<"gcn_max", BrigOpcode.GCNMAX>; +defm GCN_MIN : InstBasic_2Op_FPTypes<"gcn_min", BrigOpcode.GCNMIN>; + +// FIXME: Remove these intrinsics +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + + +defm MUL24 : InstBasic_2Op_IntTypes<"mul24", BrigOpcode.MUL24>; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + + +defm MAD24 : InstBasic_3Op_IntTypes<"mad24", BrigOpcode.MAD24>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm SQRT : InstMod_1Op_FPTypes<"sqrt", BrigOpcode.SQRT>; +defm NSQRT : InstBasic_1Op_FPTypes<"nsqrt", BrigOpcode.NSQRT>; +defm NRSQRT : InstBasic_1Op_FPTypes<"nrsqrt", BrigOpcode.NRSQRT>; +defm NRCP : InstBasic_1Op_FPTypes<"nrcp", BrigOpcode.NRCP>; + +defm : InstMod_1Op_FPTypes_Intrin_Pat<"SQRT", int_hsail_fsqrt>; +defm : InstBasic_1Op_FPTypes_Pat<"NSQRT", int_hsail_nsqrt>; +defm : InstBasic_1Op_FPTypes_Pat<"NRSQRT", int_hsail_nrsqrt>; +defm : InstBasic_1Op_FPTypes_Pat<"NRCP", int_hsail_nrcp>; + +defm FRACT : InstMod_1Op_FPTypes<"fract", BrigOpcode.FRACT>; +defm : InstMod_1Op_FPTypes_Pat<"FRACT", HSAILfract, BrigRound.FLOAT_DEFAULT>; + +// Native Floating-Point Special Functions Operations +def NSIN_F32 : HSAILInstBasic_1Op<"nsin", BrigOpcode.NSIN, Inst_F32_F32>; +def NCOS_F32 : HSAILInstBasic_1Op<"ncos", BrigOpcode.NCOS, Inst_F32_F32>; +def NEXP2_F32 : HSAILInstBasic_1Op<"nexp2", BrigOpcode.NEXP2, Inst_F32_F32>; +def NLOG2_F32 : HSAILInstBasic_1Op<"nlog2", BrigOpcode.NLOG2, Inst_F32_F32>; + +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; + +// Legacy intrinsic patterns. +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; + +// FIXME: Having the ftz explicitly in this one is totally inconsistent +def : InstMod_1Op_Pat; + +def : InstMod_1Op_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// Logical bit ops. + +defm AND : InstBasic_2Op_BitTypes<"and", BrigOpcode.AND>; +defm OR : InstBasic_2Op_BitTypes<"or", BrigOpcode.OR>; +defm XOR : InstBasic_2Op_BitTypes<"xor", BrigOpcode.XOR>; + +defm : InstBasic_2Op_BitIntTypes_Pat<"AND", and>; +defm : InstBasic_2Op_BitIntTypes_Pat<"OR", or>; +defm : InstBasic_2Op_BitIntTypes_Pat<"XOR", xor>; + +//////////////////////////////////////////////////////////////////////////////// +// not (integer, 1-bit, 32-bit, and 64-bit) +defm NOT : InstBasic_1Op_BitTypes<"not", BrigOpcode.NOT>; +defm : InstBasic_1Op_BitTypes_Pat<"NOT", not>; + +//////////////////////////////////////////////////////////////////////////////// +// cvt + +defm CVT : InstCvt_DestTypes<"cvt", BrigOpcode.CVT>; + +//////////////////////////////////////////////////////////////////////////////// +// popcount (integer, 1-bit, 32-bit, and 64-bit) + +// Patterns for integer intrinsics that match to InstSourceType. The +// LLVM intrinsics return an integer with equivalent width as the +// source, but the HSAIL instructions truncate to a 32-bit result. +multiclass InstSourceType_1Op_Pats { + def : Pat< + (i64 (node i64:$src0)), + (CVT_U64_U32 0, 0, BrigType.U64, BrigType.U32, + (i32 (!cast(inst#!if(bit_src, "_B64", "_U64")) i64:$src0, BrigType.U32, !if(bit_src, BrigType.B64, BrigType.U64)))) + >; + + def : Pat< + (i32 (trunc (node i64:$src0))), + (i32 (!cast(inst#!if(bit_src, "_B64", "_U64")) i64:$src0, BrigType.U32, !if(bit_src, BrigType.B64, BrigType.U64))) + >; + + def : Pat< + (i32 (node i32:$src0)), + (!cast(inst#!if(bit_src, "_B32", "_U32")) i32:$src0, BrigType.U32, !if(bit_src, BrigType.B32, BrigType.U32)) + >; +} + +defm POPCOUNT : InstSourceType_1Op_U32_BitTypes<"popcount", BrigOpcode.POPCOUNT>; + +defm : InstSourceType_1Op_Pats<"POPCOUNT_U32", ctpop, 1>; + +//////////////////////////////////////////////////////////////////////////////// +// firstbit (integer, 1-bit, 32-bit, and 64-bit) + +defm FIRSTBIT : InstSourceType_1Op_U32_IntTypes<"firstbit", BrigOpcode.FIRSTBIT>; +defm : InstSourceType_1Op_Pats<"FIRSTBIT_U32", ctlz>; + +def : InstSourceType_1Op_Pat; +def : InstSourceType_1Op_Pat; +def : InstSourceType_1Op_Pat; +def : InstSourceType_1Op_Pat; + +def : InstSourceType_1Op_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// lastbit (integer, 1-bit, 32-bit, and 64-bit) +defm LASTBIT : InstSourceType_1Op_U32_IntTypes<"lastbit", BrigOpcode.LASTBIT>; + +defm : InstSourceType_1Op_Pats<"LASTBIT_U32", cttz>; + +def : InstSourceType_1Op_Pat; +def : InstSourceType_1Op_Pat; + +def : InstSourceType_1Op_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// signbit (integer, 1-bit, 32-bit, and 64-bit) + +//////////////////////////////////////////////////////////////////////////////// +// shl (integer, signed and unsigned, 32-bit and 64-bit) +// shr (integer, signed and unsigned, 32-bit and 64-bit) +defm SHL : InstBasic_2Op_ShiftTypes<"shl", BrigOpcode.SHL>; +defm SHR : InstBasic_2Op_ShiftTypes<"shr", BrigOpcode.SHR>; + +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// cmov (1-bit, 32-bit, and 64-bit; integer and float) + +defm CMOV : InstBasic_3Op_CMov<"cmov", BrigOpcode.CMOV>; + +// Reduce complexity to prefer selecting various min / max patterns on +// select. +let AddedComplexity = -10 in { + def : InstBasic_CMov_Pat; + def : InstBasic_CMov_Pat; + def : InstBasic_CMov_Pat; +} + +//////////////////////////////////////////////////////////////////////////////// +// extract (integer, 32-bit, and 64-bit) + +//////////////////////////////////////////////////////////////////////////////// +// insert (integer, 32-bit, and 64-bit) + +//////////////////////////////////////////////////////////////////////////////// +// mov + +let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + // FIXME: Spec calls TypeLength moveType for some reason, but that + // would be painful to deal with. + defm MOV : InstBasic_1Op_BF<"mov", BrigOpcode.MOV>; +} + + +// FIXME: Omitting 'node' in the output pattern results in invalid +// output with a mov using and defining the same register. +class MovImmPat : Pat < + (vt node:$src0), + (movInst node:$src0, bt) +>; + +class BitconvertMovPat : Pat < + (destvt (bitconvert srcvt:$src0)), + (movInst $src0, bt) +>; + + +def : MovImmPat; +def : MovImmPat; +def : MovImmPat; +def : MovImmPat; +def : MovImmPat; + +def : BitconvertMovPat; +def : BitconvertMovPat; +def : BitconvertMovPat; +def : BitconvertMovPat; + + +//////////////////////////////////////////////////////////////////////////////// +// misc operations + +defm BITSELECT : InstBasic_3Op_BitTypes<"bitselect", BrigOpcode.BITSELECT>; +defm : InstBasic_3Op_BitTypes_Pat<"BITSELECT", HSAILbitselect>; + +defm PACK : InstSourceType_3Op_Pack_Types<"pack", BrigOpcode.PACK>; + +// Media - packcvt +def PACKCVT_U8X4_F32 : HSAILInstSourceType_4Op<"packcvt", BrigOpcode.PACKCVT, Inst_U8X4_F32_F32_F32_F32>; + +def : InstSourceType_4Op_Pat; + +def : InstSourceType_4Op_Pat; + +// Media - unpackcvt + +// TODO: src1 must be immediate. +def UNPACKCVT_F32_U8X4 : HSAILInstSourceType_2Op<"unpackcvt", BrigOpcode.UNPACKCVT, Inst_F32_U8X4_U32>; + +def : InstSourceType_2Op_Pat; + +def : InstSourceType_2Op_Pat; + +defm BITALIGN : InstBasic_3Op_BitTypes<"bitalign", BrigOpcode.BITALIGN>; +defm BYTEALIGN : InstBasic_3Op_BitTypes<"bytealign", BrigOpcode.BYTEALIGN>; + +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +def LERP_U8X4 : HSAILInstBasic_3Op<"lerp", BrigOpcode.LERP, Inst_U8X4_U8X4_U8X4_U8X4>; + +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm SAD : InstSourceType_3Op_Sad_Types<"sad", BrigOpcode.SAD>; + +def : InstSourceType_3Op_Pat; + +def : InstSourceType_3Op_Pat; + +def : InstSourceType_3Op_Pat; + +def : InstSourceType_3Op_Pat; + +def SADHI_U16X2_U8X4 : HSAILInstSourceType_3Op<"sadhi", BrigOpcode.SADHI, Inst_U16X2_U8X4_U8X4_U16X2>; + +def : InstSourceType_3Op_Pat; + +def : InstSourceType_3Op_Pat; + +// media_ops2 + +def GCN_MSAD_B32 : HSAILInstBasic_3Op<"gcn_msad", BrigOpcode.GCNMSAD, Inst_B32_B32_B32_B32>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +def GCN_SADW_B32 : HSAILInstBasic_3Op<"gcn_sadw", BrigOpcode.GCNSADW, Inst_B32_B32_B32_B32>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +def GCN_SADD_B32 : HSAILInstBasic_3Op<"gcn_sadd", BrigOpcode.GCNSADD, Inst_B32_B32_B32_B32>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm GCN_MIN3 : InstBasic_3Op_SUF<"gcn_min3", BrigOpcode.GCNMIN3>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm GCN_MAX3 : InstBasic_3Op_SUF<"gcn_max3", BrigOpcode.GCNMAX3>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm GCN_MED3 : InstBasic_3Op_SUF<"gcn_med3", BrigOpcode.GCNMED3>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +defm BITEXTRACT : InstBasic_3Op_BitExtract<"bitextract", BrigOpcode.BITEXTRACT>; +defm : InstBasic_3Op_BitExtract_IntTypes_Pat<"BITEXTRACT", HSAILubitextract, 0>; +defm : InstBasic_3Op_BitExtract_IntTypes_Pat<"BITEXTRACT", HSAILsbitextract, 1>; + +defm BITINSERT : InstBasic_4Op_BitInsert<"bitinsert", BrigOpcode.BITINSERT>; +defm : InstBasic_4Op_BitInsert_IntTypes_Pat<"BITINSERT", int_hsail_sbitinsert, 1>; +defm : InstBasic_4Op_BitInsert_IntTypes_Pat<"BITINSERT", int_hsail_ubitinsert, 0>; + +defm BITMASK : InstBasic_2Op_BitRetType_U32_U32<"bitmask", BrigOpcode.BITMASK>; +defm : InstBasic_2Op_BitRetType_U32_U32_Pat<"BITMASK", int_hsail_bitmask>; + +defm BITREV : InstBasic_1Op_BitTypes<"bitrev", BrigOpcode.BITREV>; +defm : InstBasic_1Op_BitTypes_Pat<"BITREV", int_hsail_bitrev>; + +def GCN_BFM_B32 : HSAILInstBasic_2Op<"gcn_bfm", BrigOpcode.GCNBFM, Inst_B32_B32_B32>; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; + +def GCN_QSAD_B64 : HSAILInstBasic_3Op<"gcn_qsad", BrigOpcode.GCNQSAD, Inst_B64_B64_B64_B64>; +def : InstBasic_3Op_Pat; +def : InstBasic_3Op_Pat; + +def GCN_MQSAD : HSAILInstBasic_3Op<"gcn_mqsad", BrigOpcode.GCNMQSAD, Inst_B64_B64_B32_B64>; + +def : Pat< + (int_HSAIL_mqsad (i64 (GPROrImm i64:$src0)), i32:$src1, i64:$src2), + (GCN_MQSAD $src0, $src1, $src2, BrigType.B64) +>; + +def : Pat< + (int_hsail_gcn_mqsad (i64 (GPROrImm i64:$src0)), i32:$src1, i64:$src2), + (GCN_MQSAD $src0, $src1, $src2, BrigType.B64) +>; + + +defm GCN_FLDEXP : InstBasic_2Op_LdExp<"gcn_fldexp", BrigOpcode.GCNFLDEXP>; +def : InstBasic_2Op_Pat; +def : InstBasic_2Op_Pat; Index: lib/Target/HSAIL/HSAILAsmPrinter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILAsmPrinter.h @@ -0,0 +1,90 @@ +//===-- HSAILAsmPrinter.h - Print HSAIL assembly code -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief HSAIL Assembly printer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILASMPRINTER_H +#define LLVM_LIB_TARGET_HSAIL_HSAILASMPRINTER_H + +#include "llvm/CodeGen/AsmPrinter.h" + +namespace llvm { + +class ConstantFP; +class MachineFrameInfo; + +template class SmallString; + +class HSAILAsmPrinter : public AsmPrinter { +private: + typedef std::pair AddrInit; + + StringRef getArgTypeName(Type *Ty, bool Signed = false) const; + + void emitFunctionArgument(unsigned ParamIndex, const Argument &Arg, + bool IsKernel, bool IsSExt, raw_ostream &O) const; + void emitFunctionReturn(Type *Ty, StringRef Name, bool IsKernel, bool IsSExt, + raw_ostream &O) const; + void emitFunctionLabel(const Function &F, raw_ostream &O, bool IsDecl) const; + + static char getSymbolPrefixForAddressSpace(unsigned AS); + char getSymbolPrefix(const MCSymbol &S) const; + + void printInitVarWithAddressPragma(StringRef VarName, uint64_t Offset, + const MCExpr *Expr, unsigned EltSize, + raw_ostream &O); + + void printFloat(uint32_t, raw_ostream &O); + void printDouble(uint64_t, raw_ostream &O); + void printConstantFP(const ConstantFP *CV, raw_ostream &O); + void printScalarConstant(const Constant *CV, SmallVectorImpl &Addrs, + uint64_t &TotalSizeEmitted, const DataLayout &DL, + raw_ostream &O); + + void printGVInitialValue(const GlobalValue &GV, const Constant *CV, + const DataLayout &DL, raw_ostream &O); + +public: + explicit HSAILAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer); + + bool doFinalization(Module &M) override; + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { return "HSAIL Assembly Printer"; } + + void getHSAILMangledName(SmallString<256> &Out, const GlobalValue *GV) const; + void EmitGlobalVariable(const GlobalVariable *GV) override; + void EmitStartOfAsmFile(Module &) override; + void EmitFunctionHeader() override; + void EmitFunctionEntryLabel() override; + + void checkModuleSubtargetExtensions(const Module &M, + bool &IsFullProfile, + bool &IsGCN, + bool &HasImages) const; + static bool isHSAILInstrinsic(StringRef str); + + void computeStackUsage(const MachineFrameInfo *MFI, + uint64_t &PrivateSize, + unsigned &PrivateAlign, + uint64_t &SpillSize, + unsigned &SpillAlign) const; + + void EmitFunctionBodyStart() override; + void EmitFunctionBodyEnd() override; + void EmitInstruction(const MachineInstr *MI) override; +}; + +} + +#endif Index: lib/Target/HSAIL/HSAILAsmPrinter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILAsmPrinter.cpp @@ -0,0 +1,918 @@ +//===-- HSAILAsmPrinter.cpp - HSAIL Assembly printer ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// The HSAILAsmPrinter is used to print both assembly string and also binary +/// code. When passed an MCAsmStreamer it prints assembly and when passed +/// an MCObjectStreamer it outputs binary code. +// +//===----------------------------------------------------------------------===// +// + +#include "HSAILAsmPrinter.h" +#include "HSAIL.h" +#include "HSAILMCInstLower.h" +#include "HSAILUtilityFunctions.h" +#include "HSAILSubtarget.h" +#include "HSAILStoreInitializer.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "HSAILGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + + +extern "C" void LLVMInitializeHSAILAsmPrinter() { + RegisterAsmPrinter Target32(TheHSAIL_32Target); + RegisterAsmPrinter Target64(TheHSAIL_64Target); +} + +HSAILAsmPrinter::HSAILAsmPrinter(TargetMachine &TM, + std::unique_ptr Streamer) + : AsmPrinter(TM, std::move(Streamer)) {} + +bool HSAILAsmPrinter::doFinalization(Module &M) { + EmitEndOfAsmFile(M); + return false; +} + +bool HSAILAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SetupMachineFunction(MF); + + // FIXME: Hack. Reset CurrentFnSym using the correctly mangled name. The + // generic code doesn't understand HSAIL's weird global symbol prefix rules, + // since LLVM doesn't have the concept of function scope globals that need a + // different prefix. + SmallString<256> Name; + getHSAILMangledName(Name, MF.getFunction()); + CurrentFnSym = OutContext.getOrCreateSymbol(Name); + + +// EmitFunctionEntryLabel(); + OutStreamer->SwitchSection(getObjFileLowering().getTextSection()); + EmitFunctionBody(); + + return false; +} + +static bool canInitAddressSpace(unsigned AS) { + return AS == HSAILAS::READONLY_ADDRESS || AS == HSAILAS::GLOBAL_ADDRESS; +} + +static StringRef getSegmentName(unsigned AS) { + switch (AS) { + case HSAILAS::GLOBAL_ADDRESS: + return "global"; + case HSAILAS::READONLY_ADDRESS: + return "readonly"; + case HSAILAS::GROUP_ADDRESS: + return "group"; + case HSAILAS::PRIVATE_ADDRESS: + return "private"; + default: + llvm_unreachable("unhandled segment"); + } +} + +static bool isProgramLinkage(const GlobalValue &GV) { + switch (GV.getLinkage()) { + case GlobalValue::ExternalLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::ExternalWeakLinkage: + case GlobalValue::AppendingLinkage: + return true; + + default: + return false; + } +} + +static bool isModuleLinkage(const GlobalValue &GV) { + return !isProgramLinkage(GV); +} + +void HSAILAsmPrinter::emitFunctionArgument(unsigned ParamIndex, + const Argument &A, bool IsKernel, + bool IsSExt, raw_ostream &O) const { + const DataLayout &DL = getDataLayout(); + Type *Ty = A.getType(); + + unsigned NElts = ~0u; + Type *EltTy = HSAIL::analyzeType(Ty, NElts, DL); + + if (NElts > 1) { + unsigned ABIAlign = DL.getABITypeAlignment(Ty); + if (ABIAlign != DL.getABITypeAlignment(EltTy)) + O << "align(" << ABIAlign << ") "; + } + + // TODO_HSA: Need to emit alignment information. + O << (IsKernel ? "kernarg" : "arg") << '_' << getArgTypeName(EltTy, IsSExt) + << ' ' << '%'; + + if (MF) { + const HSAILParamManager &PM = + MF->getInfo()->getParamManager(); + + O << PM.getParamName(ParamIndex); + } else { + // If we don't have a machine function, we are just printing the + // declaration. The name doesn't matter so much. + + StringRef Name = A.getName(); + if (Name.empty()) + O << "arg_p" << ParamIndex; + else + O << Name; + } + + // For vector args, we'll use an HSAIL array. + if (NElts != 0) + O << '[' << NElts << ']'; +} + +void HSAILAsmPrinter::emitFunctionReturn(Type *Ty, StringRef Name, + bool IsKernel, bool IsSExt, + raw_ostream &O) const { + const DataLayout &DL = getDataLayout(); + + unsigned NElts = ~0u; + Type *EltTy = HSAIL::analyzeType(Ty, NElts, DL); + + if (NElts > 1) { + unsigned ABIAlign = DL.getABITypeAlignment(Ty); + if (ABIAlign != DL.getABITypeAlignment(EltTy)) + O << "align(" << ABIAlign << ") "; + } + + O << (IsKernel ? "kernarg" : "arg") << '_' << getArgTypeName(EltTy, IsSExt) + << ' ' << '%' << Name; + if (NElts != 0) + O << '[' << NElts << ']'; +} + +void HSAILAsmPrinter::emitFunctionLabel(const Function &F, raw_ostream &O, + bool IsDecl) const { + Type *RetTy = F.getReturnType(); + + // FIXME: Should define HSA calling conventions. + bool IsKernel = HSAIL::isKernelFunc(&F); + + SmallString<256> Name; + getHSAILMangledName(Name, &F); + + if (!MAI->isValidUnquotedName(Name)) + report_fatal_error("Unsupported symbol name"); + + O << (IsKernel ? "kernel " : "function ") << Name << '('; + + // Functions with kernel linkage cannot have output args. + if (!IsKernel) { + if (!RetTy->isVoidTy()) { + StringRef RetName("ret"); + SmallString<256> ReturnName; + if (!IsDecl) { + getNameWithPrefix(ReturnName, &F); + RetName = ReturnName; + } + + const auto &RetAttrs = F.getAttributes().getRetAttributes(); + + bool IsSExt = + RetAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt); + bool IsZExt = + RetAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt); + + if (IsSExt || IsZExt) { + emitFunctionReturn(Type::getInt32Ty(RetTy->getContext()), RetName, + IsKernel, IsSExt, O); + } else + emitFunctionReturn(RetTy, RetName, IsKernel, IsSExt, O); + } + + O << ")("; + } + + const auto &Attrs = F.getAttributes(); + + // Avoid ugly line breaks with small argument lists. + unsigned NArgs = F.arg_size(); + if (NArgs == 0) { + O << ')'; + } else if (NArgs == 1) { + bool IsSExt = Attrs.hasAttribute(1, Attribute::SExt); + emitFunctionArgument(0, *F.arg_begin(), IsKernel, IsSExt, O); + O << ')'; + } else { + O << "\n\t"; + + // Loop through all of the parameters and emit the types and corresponding + // names. + unsigned Index = 0; + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++Index) { + bool IsSExt = Attrs.hasAttribute(Index + 1, Attribute::SExt); + emitFunctionArgument(Index, *I++, IsKernel, IsSExt, O); + if (I != E) + O << ",\n\t"; + } + + O << ')'; + } +} + +// FIXME: Doesn't make sense to rely on address space for this. +char HSAILAsmPrinter::getSymbolPrefixForAddressSpace(unsigned AS) { + return (AS == HSAILAS::GROUP_ADDRESS || AS == HSAILAS::PRIVATE_ADDRESS) ? '%' + : '&'; +} + +// FIXME: Duplicated in BRIGAsmPrinter +char HSAILAsmPrinter::getSymbolPrefix(const MCSymbol &Sym) const { + const GlobalVariable *GV = MMI->getModule()->getNamedGlobal(Sym.getName()); + assert(GV && "Need prefix for undefined GlobalVariable"); + + unsigned AS = GV->getType()->getAddressSpace(); + return getSymbolPrefixForAddressSpace(AS); +} + +void HSAILAsmPrinter::printInitVarWithAddressPragma(StringRef VarName, + uint64_t BaseOffset, + const MCExpr *Expr, + unsigned EltSize, + raw_ostream &O) { + MCValue Val; + bool Res = Expr->evaluateAsRelocatable(Val, nullptr, nullptr); + (void)Res; + assert(Res && "Could not evaluate MCExpr"); + assert(!Val.getSymB() && "Multi-symbol expressions not handled"); + + const MCSymbol &Sym = Val.getSymA()->getSymbol(); + + O << "pragma \"initvarwithaddress:" << VarName << ':' + << BaseOffset // Offset into the destination. + << ':' << EltSize << ':' << getSymbolPrefix(Sym) << Sym.getName() << ':' + << Val.getConstant() // Offset of the symbol being written. + << '\"' << ';' << '\n'; +} + +void HSAILAsmPrinter::printFloat(uint32_t Val, raw_ostream &O) { + O << format("0F%" PRIx32, Val); +} + +void HSAILAsmPrinter::printDouble(uint64_t Val, raw_ostream &O) { + O << format("0F%" PRIx64, Val); +} + +void HSAILAsmPrinter::printConstantFP(const ConstantFP *CFP, raw_ostream &O) { + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + if (CFP->getType()->isFloatTy()) + printFloat(static_cast(Val), O); + else if (CFP->getType()->isDoubleTy()) + printDouble(Val, O); + else + llvm_unreachable("unhandled ConstantFP"); +} + +void HSAILAsmPrinter::printScalarConstant(const Constant *CPV, + SmallVectorImpl &Addrs, + uint64_t &TotalSizeEmitted, + const DataLayout &DL, + raw_ostream &O) { + if (const ConstantInt *CI = dyn_cast(CPV)) { + TotalSizeEmitted += DL.getTypeAllocSize(CI->getType()); + O << CI->getValue(); + return; + } + + if (const ConstantFP *CFP = dyn_cast(CPV)) { + TotalSizeEmitted += DL.getTypeAllocSize(CFP->getType()); + printConstantFP(CFP, O); + return; + } + + if (const ConstantDataSequential *CDS = + dyn_cast(CPV)) { + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) { + if (I > 0) + O << ", "; + + const Constant *Elt = CDS->getElementAsConstant(I); + printScalarConstant(Elt, Addrs, TotalSizeEmitted, DL, O); + } + + return; + } + + if (isa(CPV)) { + TotalSizeEmitted += DL.getTypeAllocSize(CPV->getType()); + O << '0'; + return; + } + + if (const GlobalValue *GV = dyn_cast(CPV)) { + O << '0'; + + auto ME = MCSymbolRefExpr::create(getSymbol(GV), OutContext); + Addrs.push_back(std::make_pair(TotalSizeEmitted, ME)); + TotalSizeEmitted += DL.getTypeAllocSize(GV->getType()); + return; + } + + if (const ConstantExpr *CExpr = dyn_cast(CPV)) { + const MCExpr *ME = lowerConstant(CPV); + O << '0'; + + Addrs.push_back(std::make_pair(TotalSizeEmitted, ME)); + TotalSizeEmitted += DL.getTypeAllocSize(CExpr->getType()); + return; + } + + llvm_unreachable("unhandled scalar constant type"); +} + +void HSAILAsmPrinter::printGVInitialValue(const GlobalValue &GV, + const Constant *CV, + const DataLayout &DL, + raw_ostream &O) { + if (const ConstantInt *CI = dyn_cast(CV)) { + if (CI->getType()->isIntegerTy(1)) + O << (CI->getZExtValue() ? '1' : '0') << ';'; + else + O << CI->getValue() << ';'; + return; + } + + if (const ConstantFP *CFP = dyn_cast(CV)) { + printConstantFP(CFP, O); + O << ';'; + return; + } + + unsigned NElts = 1; + Type *EltTy = HSAIL::analyzeType(CV->getType(), NElts, DL); + + unsigned EltSize = DL.getTypeAllocSize(EltTy); + SmallVector AddrInits; + + // Write other cases as byte array. + StoreInitializer store(EltTy, *this); + + store.append(CV, GV.getName()); + + // Make sure this is actually an array. For the special case of a single + // pointer initializer, we don't want the braces. + if (NElts != 0) + O << getArgTypeName(EltTy) << "[]("; + + store.print(O); + + if (NElts != 0) + O << ')'; + + O << ';'; + + if (!store.varInitAddresses().empty()) + O << '\n'; + + for (const auto &VarInit : store.varInitAddresses()) { + char Pre = getSymbolPrefixForAddressSpace(GV.getType()->getAddressSpace()); + SmallString<128> Name; + Name += Pre; + Name += GV.getName(); + + printInitVarWithAddressPragma(Name, VarInit.BaseOffset, VarInit.Expr, + EltSize, O); + } + + O << '\n'; +} + +void HSAILAsmPrinter::getHSAILMangledName(SmallString<256> &NameStr, + const GlobalValue *GV) const { + if (isa(GV)) { + NameStr += '&'; + } else if (const GlobalAlias *GA = dyn_cast(GV)) { + if (isa(GA->getAliasee())) + NameStr += '&'; + else + llvm_unreachable("Not handled"); + } else { + unsigned AS = GV->getType()->getAddressSpace(); + NameStr += getSymbolPrefixForAddressSpace(AS); + } + + SmallString<256> Mangled; + SmallString<256> Sanitized; + + getNameWithPrefix(Mangled, GV); + + NameStr += Mangled; + +#if 0 + // FIXME: We need a way to deal with invalid identifiers, e.g. leading + // period. We can replace them with something here, but need a way to resolve + // possible conflicts. + if (HSAIL::sanitizedGlobalValueName(Mangled, Sanitized)) + NameStr += Sanitized; + else + NameStr += Mangled; +#endif +} + +// FIXME: Mostly duplicated in BRIGAsmPrinter +static void printAlignTypeQualifier(const GlobalValue &GV, const DataLayout &DL, + Type *InitTy, Type *EmitTy, unsigned NElts, + bool IsLocal, raw_ostream &O) { + unsigned Alignment = GV.getAlignment(); + if (Alignment == 0) + Alignment = DL.getPrefTypeAlignment(InitTy); + else { + // If an alignment is specified, it must be equal to or greater than the + // variable's natural alignment. + unsigned NaturalAlign = IsLocal ? DL.getPrefTypeAlignment(EmitTy) + : DL.getABITypeAlignment(EmitTy); + + Alignment = std::max(Alignment, NaturalAlign); + } + + // Align arrays at least by 4 bytes + if (Alignment < 4 && NElts != 0) + Alignment = 4; + + if (Alignment != DL.getABITypeAlignment(EmitTy)) + O << "align(" << Alignment << ") "; +} + +void HSAILAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + if (HSAIL::isIgnoredGV(GV)) + return; + + SmallString<256> Name; + getHSAILMangledName(Name, GV); + + SmallString<1024> Str; + raw_svector_ostream O(Str); + const DataLayout &DL = getDataLayout(); + + PointerType *Ty = GV->getType(); + Type *InitTy = Ty->getElementType(); + unsigned AS = Ty->getAddressSpace(); + + if (isProgramLinkage(*GV)) + O << "prog "; + + if (AS != HSAILAS::READONLY_ADDRESS) + O << "alloc(agent) "; + + unsigned NElts = ~0u; + Type *EmitTy = HSAIL::analyzeType(InitTy, NElts, DL); + + printAlignTypeQualifier(*GV, DL, InitTy, EmitTy, NElts, false, O); + + O << getSegmentName(AS) << '_' << getArgTypeName(EmitTy) << ' ' << Name; + + if (NElts != 0) + O << '[' << NElts << ']'; + + // TODO_HSA: if group memory has initializer, then emit instructions to + // initialize dynamically. + if (GV->hasInitializer() && canInitAddressSpace(AS)) { + const Constant *Init = cast(GV->getInitializer()); + + if (isa(Init)) + O << ';'; + else { + O << " = "; + + // Emit trivial zero initializers as a single 0. + if (Init->isNullValue()) { + Type *Ty = Init->getType(); + if (Ty->isAggregateType() || Ty->isVectorTy()) { + O << getArgTypeName(EmitTy) << "[]("; + + // FIXME: Use uint64_t for NElts + for (unsigned I = 0; I < NElts; ++I) { + if (I > 0) + O << ", "; + O << '0'; + } + + O << ')'; + } else + O << '0'; + O << ';'; + } else { + printGVInitialValue(*GV, Init, DL, O); + } + } + } else { + O << ';'; + } + + OutStreamer->EmitRawText(O.str()); +} + +// Check if any defined functions use subtargets that require extensions. +void HSAILAsmPrinter::checkModuleSubtargetExtensions(const Module &M, + bool &IsFullProfile, + bool &IsGCN, + bool &HasImages) const { + IsFullProfile = false; + IsGCN = false; + HasImages = false; + + for (const Function &F : M) { + const HSAILSubtarget &ST = TM.getSubtarget(F); + + if (ST.isFullProfile()) + IsFullProfile = true; + + if (ST.isGCN()) + IsGCN = true; + + if (ST.hasImages()) + HasImages = true; + + // Stop looking if there are no more subtarget extensions to check for, + // which is the most common case. + if (IsFullProfile && IsGCN && HasImages) + break; + } +} + +bool HSAILAsmPrinter::isHSAILInstrinsic(StringRef str) { + if ((HSAILIntrinsic::ID)Intrinsic::not_intrinsic != + getIntrinsicForGCCBuiltin("HSAIL", str.data())) + return true; + return str.startswith(StringRef("llvm.HSAIL.")); +} + +void HSAILAsmPrinter::EmitStartOfAsmFile(Module &M) { + SmallString<32> Str; + raw_svector_ostream O(Str); + + Triple TT(TM.getTargetTriple()); + bool IsLargeModel = (TT.getArch() == Triple::hsail64); + + bool IsFullProfile, IsGCN, HasImages; + checkModuleSubtargetExtensions(M, IsFullProfile, IsGCN, HasImages); + + O << "module &__llvm_hsail_module:" << BRIG_VERSION_HSAIL_MAJOR << ':' + << BRIG_VERSION_HSAIL_MINOR << ':' + << (IsFullProfile ? "$full" : "$base") << ':' + << (IsLargeModel ? "$large" : "$small") << ':' + << "$near" // TODO: Get from somewhere + << ";\n\n"; + + if (IsGCN) + O << "extension \"amd:gcn\";\n"; + + if (HasImages) + O << "extension \"IMAGE\";\n"; + + OutStreamer->EmitRawText(O.str()); + + for (const GlobalVariable &GV : M.globals()) { + unsigned AS = GV.getType()->getAddressSpace(); + if (AS != HSAILAS::PRIVATE_ADDRESS && AS != HSAILAS::GROUP_ADDRESS) + EmitGlobalVariable(&GV); + } + + // Emit function declarations, except for kernels or intrinsics. + for (const Function &F : M) { + if (F.isIntrinsic()) + continue; + + if (F.isDeclaration() && isModuleLinkage(F)) + continue; + + if (!HSAIL::isKernelFunc(&F) && !isHSAILInstrinsic(F.getName())) { + Str.clear(); + O.resync(); + + O << "decl "; + + if (isProgramLinkage(F)) + O << "prog "; + + emitFunctionLabel(F, O, true); + O << ";\n\n"; + OutStreamer->EmitRawText(O.str()); + } + } +} + +StringRef HSAILAsmPrinter::getArgTypeName(Type *Ty, bool Signed) const { + switch (Ty->getTypeID()) { + case Type::VoidTyID: + break; + case Type::FloatTyID: + return "f32"; + case Type::DoubleTyID: + return "f64"; + case Type::IntegerTyID: { + switch (Ty->getIntegerBitWidth()) { + case 32: + return Signed ? "s32" : "u32"; + case 64: + return Signed ? "s64" : "u64"; + case 1: + return "b1"; + case 8: + return Signed ? "s8" : "u8"; + case 16: + return Signed ? "s16" : "u16"; + default: + llvm_unreachable("unhandled integer width argument"); + } + } + case Type::PointerTyID: { + const PointerType *PT = cast(Ty); + const StructType *ST = dyn_cast(PT->getElementType()); + if (ST && ST->isOpaque()) { + StringRef Name = ST->getName(); + if (Name.startswith("struct._image1d_t") || + Name.startswith("struct._image1d_array_t") || + Name.startswith("struct._image1d_buffer_t") || + Name.startswith("struct._image2d_t") || + Name.startswith("struct._image2d_array_t") || + Name.startswith("struct._image3d_t")) { + return "_RWImg"; + } else if (Name.startswith("struct._sampler_t")) { + return "_Samp"; + } else if (Name == "struct._counter32_t" || Name == "struct._event_t") { + const DataLayout &DL = getDataLayout(); + return DL.getPointerSize(HSAILAS::GLOBAL_ADDRESS) == 4 ? "u64" : "u32"; + } else { + llvm_unreachable("unhandled struct type argument"); + } + } else { + unsigned AS = PT->getAddressSpace(); + return getDataLayout().getPointerSize(AS) == 4 ? "u32" : "u64"; + } + } + case Type::StructTyID: // Treat struct as array of bytes. + return "u8"; + + case Type::VectorTyID: + case Type::ArrayTyID: { + // Treat as array of elements. + const SequentialType *ST = cast(Ty); + + return getArgTypeName(ST->getElementType()); + } + default: + llvm_unreachable("unhandled argument type id"); + } + + return ""; +} + +void HSAILAsmPrinter::EmitFunctionHeader() { + // Print the 'header' of function. + const Function *F = MF->getFunction(); + + OutStreamer->SwitchSection( + getObjFileLowering().SectionForGlobal(F, *Mang, TM)); + + // Emit the CurrentFnSym. This is a virtual function to allow targets to do + // their wild and crazy things as required. + EmitFunctionEntryLabel(); +} + +void HSAILAsmPrinter::EmitFunctionEntryLabel() { + std::string FunStr; + raw_string_ostream O(FunStr); + + const Function *F = MF->getFunction(); + + if (isProgramLinkage(*F)) + O << "prog "; + emitFunctionLabel(*F, O, false); + O << "\n{"; + + OutStreamer->EmitRawText(O.str()); +} + +void HSAILAsmPrinter::computeStackUsage(const MachineFrameInfo *MFI, + uint64_t &PrivateSize, + unsigned &PrivateAlign, + uint64_t &SpillSize, + unsigned &SpillAlign) const { + SpillSize = 0; + PrivateSize = 0; + PrivateAlign = 4; + SpillAlign = 4; + + // The stack objects have been preprocessed by + // processFunctionBeforeFrameFinalized so that we only expect the last two + // frame objects. + for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd(); + I != E; ++I) { + if (MFI->isDeadObjectIndex(I)) + continue; + + if (MFI->isSpillSlotObjectIndex(I)) { + assert(SpillSize == 0 && "Only one spill object should be seen"); + + SpillSize = MFI->getObjectSize(I); + SpillAlign = MFI->getObjectAlignment(I); + } else { + assert(PrivateSize == 0 && "Only one private object should be seen"); + + PrivateSize = MFI->getObjectSize(I); + PrivateAlign = MFI->getObjectAlignment(I); + } + } +} + +void HSAILAsmPrinter::EmitFunctionBodyStart() { + std::string FunStr; + raw_string_ostream O(FunStr); + + const DataLayout &DL = getDataLayout(); + +#if 0 + if (isKernelFunc(*F)) { // Emitting block data inside of kernel. + uint32_t id = 0; + mMeta->setID(id); + mMeta->setKernel(true); + ++mBuffer; + if (isKernel) { + mMeta->printHeader(mKernelName); + if (isOpenCLKernel) + mMeta->processArgMetadata(O, mBuffer, isKernel); + mMeta->printMetaData(O, id, isKernel); + } + } +#endif + + SmallPtrSet FuncPvtVarsSet; + SmallPtrSet FuncGrpVarsSet; + for (const MachineBasicBlock &MBB : *MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isGlobal()) + continue; + + const GlobalVariable *GV = dyn_cast(MO.getGlobal()); + if (!GV) + continue; + + unsigned AS = GV->getType()->getAddressSpace(); + if (AS == HSAILAS::PRIVATE_ADDRESS) + FuncPvtVarsSet.insert(GV); + + if (AS == HSAILAS::GROUP_ADDRESS) + FuncGrpVarsSet.insert(GV); + } + } + } + + // Emit group variable declarations. + const Module *M = MF->getMMI().getModule(); + for (const GlobalVariable &GV : M->globals()) { + PointerType *Ty = GV.getType(); + unsigned AS = Ty->getAddressSpace(); + if (AS == HSAILAS::GROUP_ADDRESS) { + if (FuncGrpVarsSet.count(&GV)) { + std::string str; + O << '\t'; + + Type *InitTy = Ty->getElementType(); + + unsigned NElts = ~0u; + Type *EmitTy = HSAIL::analyzeType(InitTy, NElts, DL); + printAlignTypeQualifier(GV, DL, InitTy, EmitTy, NElts, true, O); + + O << getSegmentName(AS) << '_' << getArgTypeName(EmitTy) << " %" + << GV.getName(); + + if (NElts != 0) + O << '[' << NElts << ']'; + + O << ";\n"; + } + } + } + + O << '\n'; + + // Emit private variable declarations. + for (const GlobalVariable &GV : M->globals()) { + PointerType *Ty = GV.getType(); + unsigned AS = Ty->getAddressSpace(); + if (AS == HSAILAS::PRIVATE_ADDRESS) { + if (FuncPvtVarsSet.count(&GV)) { + StringRef GVname = GV.getName(); + bool ChangeName = false; + SmallVector NameParts; + const char *tmp_opt_name = "tmp_opt_var"; + std::string str; + if (GVname.empty()) { + str = tmp_opt_name; + ChangeName = true; + } else if (!isalpha(GVname[0]) && GVname[0] != '_') { + str = tmp_opt_name; + str.append(GVname); + ChangeName = true; + } + + { // replace all '.' with '_' + size_t pos = str.find('.'); + if (pos != std::string::npos) + ChangeName = true; + + while (pos != std::string::npos) { + str.replace(pos++, 1, "_"); + pos = str.find('.', pos); + } + } + + if (ChangeName) { + // FIXME + (const_cast(&GV))->setName(str); + } + + O << '\t'; + + Type *InitTy = Ty->getElementType(); + + unsigned NElts = ~0u; + Type *EmitTy = HSAIL::analyzeType(InitTy, NElts, DL); + + printAlignTypeQualifier(GV, DL, InitTy, EmitTy, NElts, true, O); + str = ""; + + O << '_' << getArgTypeName(EmitTy) << " %" << GV.getName(); + if (NElts != 0) + O << '[' << NElts << ']'; + + if (GV.hasInitializer() && canInitAddressSpace(AS)) { + O << " = "; + printGVInitialValue(GV, cast(GV.getInitializer()), DL, O); + } + } + } + } + + const MachineFrameInfo *MFI = MF->getFrameInfo(); + + uint64_t SpillSize, PrivateSize; + unsigned PrivateAlign, SpillAlign; + computeStackUsage(MFI, PrivateSize, PrivateAlign, SpillSize, SpillAlign); + + if (PrivateSize != 0) { + O << "\talign(" << PrivateAlign + << ") private_u8 %__privateStack[" << PrivateSize << "];\n"; + } + + if (SpillSize != 0) { + O << "\talign(" << SpillAlign + << ") spill_u8 %__spillStack[" << SpillSize << "];\n"; + } + + const HSAILMachineFunctionInfo *Info = MF->getInfo(); + if (Info->hasScavengerSpill()) + O << "\tspill_u32 %___spillScavenge;"; + + +#if 0 + // Allocate gcn region for gcn atomic counter, if required. + if (usesGCNAtomicCounter()) + O << "\tgcn_region_alloc 4;\n"; +#endif + + OutStreamer->EmitRawText(O.str()); +} + +void HSAILAsmPrinter::EmitFunctionBodyEnd() { OutStreamer->EmitRawText("};"); } + +void HSAILAsmPrinter::EmitInstruction(const MachineInstr *MI) { + HSAILMCInstLower MCInstLowering(OutContext, *this); + + MCInst TmpInst; + MCInstLowering.lower(MI, TmpInst); + EmitToStreamer(*OutStreamer, TmpInst); +} Index: lib/Target/HSAIL/HSAILAtomics.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILAtomics.td @@ -0,0 +1,114 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// atomics + +let mayLoad = 1, mayStore = 1, hasSideEffects = 1, HasDefaultSegment = 1 in { + def GCN_ATOMIC_APPEND_U32 : HSAILInstAddr_1Op<"gcn_atomic_append", BrigOpcode.GCNAPPEND, Inst_U32_U32>; + def GCN_ATOMIC_CONSUME_U32 : HSAILInstAddr_1Op<"gcn_atomic_consume", BrigOpcode.GCNCONSUME, Inst_U32_U32>; +} + +def : InstAddr_1Op_Pat< + GCN_ATOMIC_APPEND_U32, + int_HSAIL_gcn_atomic_append_u32, + BrigType.U32, + AddressSpace.REGION +>; + +def : InstAddr_1Op_Pat< + GCN_ATOMIC_CONSUME_U32, + int_HSAIL_gcn_atomic_consume_u32, + BrigType.U32, + AddressSpace.REGION +>; + +// We must define a new opcode for each atomic operation because we +// can't change the properties of the instruction based on the +// operation modifier operand. e.g. mayLoad must always be the same +// for different instances of the instruction. +multiclass InstAtomicOp brigVal> { + defm _ADD : InstAtomic_1Op_IntTypes; + defm _AND : InstAtomic_1Op_BitTypes; + defm _CAS : InstAtomic_2Op_BitTypes; + defm _EXCH : InstAtomic_1Op_BitTypes; + + let mayStore = 0 in { + defm _LD : InstAtomic_0Op_BitTypes; + } + + defm _MAX : InstAtomic_1Op_IntTypes; + defm _MIN : InstAtomic_1Op_IntTypes; + defm _OR : InstAtomic_1Op_BitTypes; + + defm _SUB : InstAtomic_1Op_IntTypes; + defm _WRAPDEC : InstAtomic_1Op_IntTypes; + defm _WRAPINC : InstAtomic_1Op_IntTypes; + defm _XOR : InstAtomic_1Op_BitTypes; +} + +let hasPostISelHook = 1 in { + defm ATOMIC : InstAtomicOp<"atomic", BrigOpcode.ATOMIC>; +} + +multiclass InstAtomicOp_NoRet brigVal> { + defm _ADD : InstAtomic_1Op_NoRet_IntTypes; + defm _AND : InstAtomic_1Op_NoRet_BitTypes; + defm _EXCH : InstAtomic_1Op_NoRet_BitTypes; + defm _MAX : InstAtomic_1Op_NoRet_IntTypes; + defm _MIN : InstAtomic_1Op_NoRet_IntTypes; + defm _OR : InstAtomic_1Op_NoRet_BitTypes; + + let mayLoad = 0 in { + defm _ST : InstAtomic_1Op_NoRet_BitTypes; + } + + defm _SUB : InstAtomic_1Op_NoRet_IntTypes; + defm _WRAPDEC : InstAtomic_1Op_NoRet_IntTypes; + defm _WRAPINC : InstAtomic_1Op_NoRet_IntTypes; + defm _XOR : InstAtomic_1Op_NoRet_BitTypes; +} + + +// atomic is replaced by atomicnoret in the post-isel hook if there +// are no uses of the returned value. +defm ATOMICNORET : InstAtomicOp_NoRet<"atomicnoret", BrigOpcode.ATOMICNORET>; + + +// def atomic_load_inc : PatFrag< +// (ops node:$ptr), +// (atomic_load_add node:$ptr, 1) +// >; + +// def atomic_load_dec : PatFrag< +// (ops node:$ptr), +// (atomic_load_sub node:$ptr, 1) +// >; + + +// let AddedComplexity = 1 in { +// defm : AtomicPat_0Op_IntTypes; +// defm : AtomicPat_0Op_IntTypes; +// } + +defm : AtomicPat_0Op_BitTypes<"ATOMIC_LD", atomic_load>; +defm : AtomicPat_1Op_NoRet_BitTypes<"ATOMICNORET_ST", atomic_store>; + +defm : AtomicPat_1Op_BitTypes<"ATOMIC_EXCH", atomic_swap>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_ADD", atomic_load_add, 1>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_SUB", atomic_load_sub, 1>; +defm : AtomicPat_1Op_BitTypes<"ATOMIC_AND", atomic_load_and>; +defm : AtomicPat_1Op_BitTypes<"ATOMIC_OR", atomic_load_or>; +defm : AtomicPat_1Op_BitTypes<"ATOMIC_XOR", atomic_load_xor>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_MIN", atomic_load_min, 1>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_MAX", atomic_load_max, 1>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_MIN", atomic_load_umin, 0>; +defm : AtomicPat_1Op_IntTypes<"ATOMIC_MAX", atomic_load_umax, 0>; + +defm : AtomicPat_2Op_BitTypes<"ATOMIC_CAS", atomic_cmp_swap>; Index: lib/Target/HSAIL/HSAILBrig.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILBrig.h @@ -0,0 +1,16 @@ +//===-- HSAILBrig.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILBRIG_H +#define LLVM_LIB_TARGET_HSAIL_HSAILBRIG_H + +#include "llvm/Support/DataTypes.h" +#include "libHSAIL/Brig.h" + +#endif Index: lib/Target/HSAIL/HSAILBrigDefs.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILBrigDefs.h @@ -0,0 +1,1130 @@ +//===-- BRIGEnums.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILBRIGDEFS_H +#define LLVM_LIB_TARGET_HSAIL_HSAILBRIGDEFS_H + +#if HSAIL_USE_LIBHSAIL +#include "HSAILBrig.h" +#else + +#include + +typedef uint32_t BrigVersion32_t; + +enum BrigVersion { + + //.nowrap + //.nodump + //.nollvm + + BRIG_VERSION_HSAIL_MAJOR = 1, + BRIG_VERSION_HSAIL_MINOR = 0, + BRIG_VERSION_BRIG_MAJOR = 1, + BRIG_VERSION_BRIG_MINOR = 0 +}; + +typedef uint8_t BrigAlignment8_t; //.defValue=BRIG_ALIGNMENT_NONE + +typedef uint8_t BrigAllocation8_t; //.defValue=BRIG_ALLOCATION_NONE + +typedef uint8_t BrigAluModifier8_t; + +typedef uint8_t BrigAtomicOperation8_t; + +typedef uint32_t BrigCodeOffset32_t; //.defValue=0 //.wtype=ItemRef + +typedef uint8_t BrigCompareOperation8_t; + +typedef uint16_t BrigControlDirective16_t; + +typedef uint32_t BrigDataOffset32_t; + +typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t; //.wtype=ListRef //.defValue=0 + +typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t; //.wtype=ListRef //.defValue=0 + +typedef BrigDataOffset32_t BrigDataOffsetString32_t; //.wtype=StrRef //.defValue=0 + +typedef uint8_t BrigExecutableModifier8_t; + +typedef uint8_t BrigImageChannelOrder8_t; //.defValue=BRIG_CHANNEL_ORDER_UNKNOWN + +typedef uint8_t BrigImageChannelType8_t; //.defValue=BRIG_CHANNEL_TYPE_UNKNOWN + +typedef uint8_t BrigImageGeometry8_t; //.defValue=BRIG_GEOMETRY_UNKNOWN + +typedef uint8_t BrigImageQuery8_t; + +typedef uint16_t BrigKind16_t; + +typedef uint8_t BrigLinkage8_t; //.defValue=BRIG_LINKAGE_NONE + +typedef uint8_t BrigMachineModel8_t; //.defValue=BRIG_MACHINE_LARGE + +typedef uint8_t BrigMemoryModifier8_t; + +typedef uint8_t BrigMemoryOrder8_t; //.defValue=BRIG_MEMORY_ORDER_RELAXED + +typedef uint8_t BrigMemoryScope8_t; //.defValue=BRIG_MEMORY_SCOPE_SYSTEM + +typedef uint16_t BrigOpcode16_t; + +typedef uint32_t BrigOperandOffset32_t; //.defValue=0 //.wtype=ItemRef + +typedef uint8_t BrigPack8_t; //.defValue=BRIG_PACK_NONE + +typedef uint8_t BrigProfile8_t; //.defValue=BRIG_PROFILE_FULL + +typedef uint16_t BrigRegisterKind16_t; + +typedef uint8_t BrigRound8_t; //.defValue=BRIG_ROUND_NONE + +typedef uint8_t BrigSamplerAddressing8_t; //.defValue=BRIG_ADDRESSING_CLAMP_TO_EDGE + +typedef uint8_t BrigSamplerCoordNormalization8_t; + +typedef uint8_t BrigSamplerFilter8_t; + +typedef uint8_t BrigSamplerQuery8_t; + +typedef uint32_t BrigSectionIndex32_t; + +typedef uint8_t BrigSegCvtModifier8_t; + +typedef uint8_t BrigSegment8_t; //.defValue=BRIG_SEGMENT_NONE + +typedef uint32_t BrigStringOffset32_t; //.defValue=0 //.wtype=StrRef + +typedef uint16_t BrigType16_t; + +typedef uint8_t BrigVariableModifier8_t; + +typedef uint8_t BrigWidth8_t; + +typedef uint32_t BrigExceptions32_t; + +enum BrigKind { + + //.nollvm + // + //.wname={ s/^BRIG_KIND//; MACRO2Name($_) } + //.mnemo=$wname{ $wname } + // + //.sizeof=$wname{ "sizeof(".$structs->{"Brig".$wname}->{rawbrig}.")" } + //.sizeof_switch //.sizeof_proto="int size_of_brig_record(unsigned arg)" //.sizeof_default="return -1" + // + //.isBodyOnly={ "false" } + //.isBodyOnly_switch //.isBodyOnly_proto="bool isBodyOnly(Directive d)" //.isBodyOnly_arg="d.kind()" + //.isBodyOnly_default="assert(false); return false" + // + //.isToplevelOnly={ "false" } + //.isToplevelOnly_switch //.isToplevelOnly_proto="bool isToplevelOnly(Directive d)" //.isToplevelOnly_arg="d.kind()" + //.isToplevelOnly_default="assert(false); return false" + + BRIG_KIND_NONE = 0x0000, //.skip + + BRIG_KIND_DIRECTIVE_BEGIN = 0x1000, //.skip + BRIG_KIND_DIRECTIVE_ARG_BLOCK_END = 0x1000, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_ARG_BLOCK_START = 0x1001, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_COMMENT = 0x1002, + BRIG_KIND_DIRECTIVE_CONTROL = 0x1003, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_EXTENSION = 0x1004, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_FBARRIER = 0x1005, + BRIG_KIND_DIRECTIVE_FUNCTION = 0x1006, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION = 0x1007, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_KERNEL = 0x1008, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_LABEL = 0x1009, //.isBodyOnly=true + BRIG_KIND_DIRECTIVE_LOC = 0x100a, + BRIG_KIND_DIRECTIVE_MODULE = 0x100b, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_PRAGMA = 0x100c, + BRIG_KIND_DIRECTIVE_SIGNATURE = 0x100d, //.isToplevelOnly=true + BRIG_KIND_DIRECTIVE_VARIABLE = 0x100e, + BRIG_KIND_DIRECTIVE_END = 0x100f, //.skip + + BRIG_KIND_INST_BEGIN = 0x2000, //.skip + BRIG_KIND_INST_ADDR = 0x2000, + BRIG_KIND_INST_ATOMIC = 0x2001, + BRIG_KIND_INST_BASIC = 0x2002, + BRIG_KIND_INST_BR = 0x2003, + BRIG_KIND_INST_CMP = 0x2004, + BRIG_KIND_INST_CVT = 0x2005, + BRIG_KIND_INST_IMAGE = 0x2006, + BRIG_KIND_INST_LANE = 0x2007, + BRIG_KIND_INST_MEM = 0x2008, + BRIG_KIND_INST_MEM_FENCE = 0x2009, + BRIG_KIND_INST_MOD = 0x200a, + BRIG_KIND_INST_QUERY_IMAGE = 0x200b, + BRIG_KIND_INST_QUERY_SAMPLER = 0x200c, + BRIG_KIND_INST_QUEUE = 0x200d, + BRIG_KIND_INST_SEG = 0x200e, + BRIG_KIND_INST_SEG_CVT = 0x200f, + BRIG_KIND_INST_SIGNAL = 0x2010, + BRIG_KIND_INST_SOURCE_TYPE = 0x2011, + BRIG_KIND_INST_END = 0x2012, //.skip + + BRIG_KIND_OPERAND_BEGIN = 0x3000, //.skip + BRIG_KIND_OPERAND_ADDRESS = 0x3000, + BRIG_KIND_OPERAND_ALIGN = 0x3001, + BRIG_KIND_OPERAND_CODE_LIST = 0x3002, + BRIG_KIND_OPERAND_CODE_REF = 0x3003, + BRIG_KIND_OPERAND_CONSTANT_BYTES = 0x3004, + BRIG_KIND_OPERAND_RESERVED = 0x3005, //.skip + BRIG_KIND_OPERAND_CONSTANT_IMAGE = 0x3006, + BRIG_KIND_OPERAND_CONSTANT_OPERAND_LIST = 0x3007, + BRIG_KIND_OPERAND_CONSTANT_SAMPLER = 0x3008, + BRIG_KIND_OPERAND_OPERAND_LIST = 0x3009, + BRIG_KIND_OPERAND_REGISTER = 0x300a, + BRIG_KIND_OPERAND_STRING = 0x300b, + BRIG_KIND_OPERAND_WAVESIZE = 0x300c, + BRIG_KIND_OPERAND_END = 0x300d //.skip +}; + +enum BrigAlignment { + + //.mnemo={ s/^BRIG_ALIGNMENT_//; lc } + //.mnemo_proto="const char* align2str(unsigned arg)" + // + //.bytes={ /(\d+)/ ? $1 : undef } + //.bytes_switch //.bytes_proto="unsigned align2num(unsigned arg)" //.bytes_default="assert(false); return -1" + // + //.rbytes=$bytes{ $bytes } + //.rbytes_switch //.rbytes_reverse //.rbytes_proto="BrigAlignment num2align(uint64_t arg)" + //.rbytes_default="return BRIG_ALIGNMENT_LAST" + // + //.print=$bytes{ $bytes>1 ? "_align($bytes)" : "" } + + BRIG_ALIGNMENT_NONE = 0, //.no_mnemo + BRIG_ALIGNMENT_1 = 1, //.mnemo="" + BRIG_ALIGNMENT_2 = 2, + BRIG_ALIGNMENT_4 = 3, + BRIG_ALIGNMENT_8 = 4, + BRIG_ALIGNMENT_16 = 5, + BRIG_ALIGNMENT_32 = 6, + BRIG_ALIGNMENT_64 = 7, + BRIG_ALIGNMENT_128 = 8, + BRIG_ALIGNMENT_256 = 9, + + BRIG_ALIGNMENT_LAST, //.skip + BRIG_ALIGNMENT_MAX = BRIG_ALIGNMENT_LAST - 1 //.skip +}; + +enum BrigAllocation { + + //.mnemo={ s/^BRIG_ALLOCATION_//;lc } + //.mnemo_token=EAllocKind + + BRIG_ALLOCATION_NONE = 0, //.mnemo="" + BRIG_ALLOCATION_PROGRAM = 1, + BRIG_ALLOCATION_AGENT = 2, + BRIG_ALLOCATION_AUTOMATIC = 3 +}; + +enum BrigAluModifierMask { + BRIG_ALU_FTZ = 1 +}; + +enum BrigAtomicOperation { + + //.tdcaption="Atomic Operations" + // + //.mnemo={ s/^BRIG_ATOMIC_//;lc } + //.mnemo_token=_EMAtomicOp + //.mnemo_context=EInstModifierInstAtomicContext + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_ATOMIC_ADD = 0, + BRIG_ATOMIC_AND = 1, + BRIG_ATOMIC_CAS = 2, + BRIG_ATOMIC_EXCH = 3, + BRIG_ATOMIC_LD = 4, + BRIG_ATOMIC_MAX = 5, + BRIG_ATOMIC_MIN = 6, + BRIG_ATOMIC_OR = 7, + BRIG_ATOMIC_ST = 8, + BRIG_ATOMIC_SUB = 9, + BRIG_ATOMIC_WRAPDEC = 10, + BRIG_ATOMIC_WRAPINC = 11, + BRIG_ATOMIC_XOR = 12, + BRIG_ATOMIC_WAIT_EQ = 13, + BRIG_ATOMIC_WAIT_NE = 14, + BRIG_ATOMIC_WAIT_LT = 15, + BRIG_ATOMIC_WAIT_GTE = 16, + BRIG_ATOMIC_WAITTIMEOUT_EQ = 17, + BRIG_ATOMIC_WAITTIMEOUT_NE = 18, + BRIG_ATOMIC_WAITTIMEOUT_LT = 19, + BRIG_ATOMIC_WAITTIMEOUT_GTE = 20 +}; + +enum BrigCompareOperation { + + //.tdcaption="Comparison Operators" + // + //.mnemo={ s/^BRIG_COMPARE_//;lc } + //.mnemo_token=_EMCompare + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_COMPARE_EQ = 0, + BRIG_COMPARE_NE = 1, + BRIG_COMPARE_LT = 2, + BRIG_COMPARE_LE = 3, + BRIG_COMPARE_GT = 4, + BRIG_COMPARE_GE = 5, + BRIG_COMPARE_EQU = 6, + BRIG_COMPARE_NEU = 7, + BRIG_COMPARE_LTU = 8, + BRIG_COMPARE_LEU = 9, + BRIG_COMPARE_GTU = 10, + BRIG_COMPARE_GEU = 11, + BRIG_COMPARE_NUM = 12, + BRIG_COMPARE_NAN = 13, + BRIG_COMPARE_SEQ = 14, + BRIG_COMPARE_SNE = 15, + BRIG_COMPARE_SLT = 16, + BRIG_COMPARE_SLE = 17, + BRIG_COMPARE_SGT = 18, + BRIG_COMPARE_SGE = 19, + BRIG_COMPARE_SGEU = 20, + BRIG_COMPARE_SEQU = 21, + BRIG_COMPARE_SNEU = 22, + BRIG_COMPARE_SLTU = 23, + BRIG_COMPARE_SLEU = 24, + BRIG_COMPARE_SNUM = 25, + BRIG_COMPARE_SNAN = 26, + BRIG_COMPARE_SGTU = 27 +}; + +enum BrigControlDirective { + + //.mnemo={ s/^BRIG_CONTROL_//;lc } + //.mnemo_token=EControl + // + //.print=$mnemo{ $mnemo } + + BRIG_CONTROL_NONE = 0, //.skip + BRIG_CONTROL_ENABLEBREAKEXCEPTIONS = 1, + BRIG_CONTROL_ENABLEDETECTEXCEPTIONS = 2, + BRIG_CONTROL_MAXDYNAMICGROUPSIZE = 3, + BRIG_CONTROL_MAXFLATGRIDSIZE = 4, + BRIG_CONTROL_MAXFLATWORKGROUPSIZE = 5, + BRIG_CONTROL_REQUIREDDIM = 6, + BRIG_CONTROL_REQUIREDGRIDSIZE = 7, + BRIG_CONTROL_REQUIREDWORKGROUPSIZE = 8, + BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS = 9 +}; + +enum BrigExecutableModifierMask { + //.nodump + BRIG_EXECUTABLE_DEFINITION = 1 +}; + +enum BrigImageChannelOrder { + + //.mnemo={ s/^BRIG_CHANNEL_ORDER_?//;lc } + //.mnemo_token=EImageOrder + //.mnemo_context=EImageOrderContext + // + //.print=$mnemo{ $mnemo } + + BRIG_CHANNEL_ORDER_A = 0, + BRIG_CHANNEL_ORDER_R = 1, + BRIG_CHANNEL_ORDER_RX = 2, + BRIG_CHANNEL_ORDER_RG = 3, + BRIG_CHANNEL_ORDER_RGX = 4, + BRIG_CHANNEL_ORDER_RA = 5, + BRIG_CHANNEL_ORDER_RGB = 6, + BRIG_CHANNEL_ORDER_RGBX = 7, + BRIG_CHANNEL_ORDER_RGBA = 8, + BRIG_CHANNEL_ORDER_BGRA = 9, + BRIG_CHANNEL_ORDER_ARGB = 10, + BRIG_CHANNEL_ORDER_ABGR = 11, + BRIG_CHANNEL_ORDER_SRGB = 12, + BRIG_CHANNEL_ORDER_SRGBX = 13, + BRIG_CHANNEL_ORDER_SRGBA = 14, + BRIG_CHANNEL_ORDER_SBGRA = 15, + BRIG_CHANNEL_ORDER_INTENSITY = 16, + BRIG_CHANNEL_ORDER_LUMINANCE = 17, + BRIG_CHANNEL_ORDER_DEPTH = 18, + BRIG_CHANNEL_ORDER_DEPTH_STENCIL = 19, + + // used internally + BRIG_CHANNEL_ORDER_UNKNOWN, //.mnemo="" // used when no order is specified + + BRIG_CHANNEL_ORDER_FIRST_USER_DEFINED = 128 //.skip + +}; + +enum BrigImageChannelType { + + //.mnemo={ s/^BRIG_CHANNEL_TYPE_//;lc } + //.mnemo_token=EImageFormat + // + //.print=$mnemo{ $mnemo } + + BRIG_CHANNEL_TYPE_SNORM_INT8 = 0, + BRIG_CHANNEL_TYPE_SNORM_INT16 = 1, + BRIG_CHANNEL_TYPE_UNORM_INT8 = 2, + BRIG_CHANNEL_TYPE_UNORM_INT16 = 3, + BRIG_CHANNEL_TYPE_UNORM_INT24 = 4, + BRIG_CHANNEL_TYPE_UNORM_SHORT_555 = 5, + BRIG_CHANNEL_TYPE_UNORM_SHORT_565 = 6, + BRIG_CHANNEL_TYPE_UNORM_INT_101010 = 7, + BRIG_CHANNEL_TYPE_SIGNED_INT8 = 8, + BRIG_CHANNEL_TYPE_SIGNED_INT16 = 9, + BRIG_CHANNEL_TYPE_SIGNED_INT32 = 10, + BRIG_CHANNEL_TYPE_UNSIGNED_INT8 = 11, + BRIG_CHANNEL_TYPE_UNSIGNED_INT16 = 12, + BRIG_CHANNEL_TYPE_UNSIGNED_INT32 = 13, + BRIG_CHANNEL_TYPE_HALF_FLOAT = 14, + BRIG_CHANNEL_TYPE_FLOAT = 15, + + // used internally + BRIG_CHANNEL_TYPE_UNKNOWN, //.mnemo="" + + BRIG_CHANNEL_TYPE_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigImageGeometry { + + //.tdcaption="Geometry" + // + //.mnemo={ s/^BRIG_GEOMETRY_//;lc } + //.mnemo_token=EImageGeometry + // + //.dim={/_([0-9]+D)(A)?/ ? $1+(defined $2?1:0) : undef} + //.dim_switch //.dim_proto="unsigned getBrigGeometryDim(unsigned geo)" //.dim_arg="geo" + //.dim_default="assert(0); return 0" + // + //.depth={/DEPTH$/?"true":"false"} + //.depth_switch //.depth_proto="bool isBrigGeometryDepth(unsigned geo)" //.depth_arg="geo" + //.depth_default="return false" + + BRIG_GEOMETRY_1D = 0, + BRIG_GEOMETRY_2D = 1, + BRIG_GEOMETRY_3D = 2, + BRIG_GEOMETRY_1DA = 3, + BRIG_GEOMETRY_2DA = 4, + BRIG_GEOMETRY_1DB = 5, + BRIG_GEOMETRY_2DDEPTH = 6, + BRIG_GEOMETRY_2DADEPTH = 7, + + // used internally + BRIG_GEOMETRY_UNKNOWN, //.mnemo="" + + BRIG_GEOMETRY_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigImageQuery { + + //.mnemo={ s/^BRIG_IMAGE_QUERY_//;lc } + // + //.print=$mnemo{ $mnemo } + + BRIG_IMAGE_QUERY_WIDTH = 0, + BRIG_IMAGE_QUERY_HEIGHT = 1, + BRIG_IMAGE_QUERY_DEPTH = 2, + BRIG_IMAGE_QUERY_ARRAY = 3, + BRIG_IMAGE_QUERY_CHANNELORDER = 4, + BRIG_IMAGE_QUERY_CHANNELTYPE = 5 +}; + +enum BrigLinkage { + + //.mnemo={ s/^BRIG_LINKAGE_//;s/NONE//;lc } + + BRIG_LINKAGE_NONE = 0, + BRIG_LINKAGE_PROGRAM = 1, + BRIG_LINKAGE_MODULE = 2, + BRIG_LINKAGE_FUNCTION = 3, + BRIG_LINKAGE_ARG = 4 +}; + +enum BrigMachineModel { + + //.mnemo={ s/^BRIG_MACHINE_//; '$'.lc } + //.mnemo_token=ETargetMachine + // + //.print=$mnemo{ $mnemo } + + BRIG_MACHINE_SMALL = 0, + BRIG_MACHINE_LARGE = 1, + + BRIG_MACHINE_UNDEF = 2 //.skip +}; + +enum BrigMemoryModifierMask { //.tddef=0 + BRIG_MEMORY_CONST = 1 +}; + +enum BrigMemoryOrder { + + //.mnemo={ s/^BRIG_MEMORY_ORDER_//; lc } + //.mnemo_token=_EMMemoryOrder + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_MEMORY_ORDER_NONE = 0, //.mnemo="" + BRIG_MEMORY_ORDER_RELAXED = 1, //.mnemo=rlx + BRIG_MEMORY_ORDER_SC_ACQUIRE = 2, //.mnemo=scacq + BRIG_MEMORY_ORDER_SC_RELEASE = 3, //.mnemo=screl + BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE = 4, //.mnemo=scar + + BRIG_MEMORY_ORDER_LAST = 5 //.skip +}; + +enum BrigMemoryScope { + + //.mnemo={ s/^BRIG_MEMORY_SCOPE_//; lc } + //.mnemo_token=_EMMemoryScope + // + //.print=$mnemo{ $mnemo } + + BRIG_MEMORY_SCOPE_NONE = 0, //.mnemo="" + BRIG_MEMORY_SCOPE_WORKITEM = 1, //.mnemo="" + BRIG_MEMORY_SCOPE_WAVEFRONT = 2, //.mnemo=wave + BRIG_MEMORY_SCOPE_WORKGROUP = 3, //.mnemo=wg + BRIG_MEMORY_SCOPE_AGENT = 4, //.mnemo=agent + BRIG_MEMORY_SCOPE_SYSTEM = 5, //.mnemo=system + + BRIG_MEMORY_SCOPE_LAST = 6 //.skip +}; + +enum BrigOpcode { + + //.tdcaption="Instruction Opcodes" + // + //.k={ "BASIC" } + //.pscode=$k{ MACRO2Name("_".$k) } + //.opcodeparser=$pscode{ return $pscode && "parseMnemo$pscode" } + //.opcodeparser_incfile=ParserUtilities + //.opcodeparser_switch //.opcodeparser_proto="OpcodeParser getOpcodeParser(BrigOpcode16_t arg)" //.opcodeparser_default="return parseMnemoBasic" + // + //.psopnd={undef} + //.opndparser=$psopnd{ return $psopnd && "&Parser::parse$psopnd" } + //.opndparser_incfile=ParserUtilities + //.opndparser_switch //.opndparser_proto="Parser::OperandParser Parser::getOperandParser(BrigOpcode16_t arg)" //.opndparser_default="return &Parser::parseOperands" + // + //.mnemo={ s/^BRIG_OPCODE_//; s/GCN([^_])/GCN_$1/; lc } + //.mnemo_scanner=Instructions //.mnemo_token=EInstruction + //.mnemo_context=EDefaultContext + // + //.has_memory_order={undef} + //.semsupport=$has_memory_order{ return $has_memory_order && "true" } + // + //.hasType=$k{ return ($k and $k eq "BASIC_NO_TYPE") ? "false" : undef; } + //.hasType_switch //.hasType_proto="bool instHasType(BrigOpcode16_t arg)" //.hasType_default="return true" + // + //.opcodevis=$pscode{ s/^BRIG_OPCODE_//; sprintf("%-47s(","vis.visitOpcode_".$_) . ($pscode =~m/^(BasicOrMod|Nop)$/? "inst" : "HSAIL_ASM::Inst". ($pscode=~m/BasicNoType/? "Basic":$pscode) ."(inst)").")" } + //.opcodevis_switch //.opcodevis_proto="template RetType visitOpcode_gen(HSAIL_ASM::Inst inst, Visitor& vis)" + //.opcodevis_arg="inst.opcode()" //.opcodevis_default="return RetType()" + //.opcodevis_incfile=ItemUtils + // + //.ftz=$k{ return ($k eq "BASIC_OR_MOD" or $k eq "CMP" or $k eq "CVT") ? "true" : undef } + //.ftz_incfile=ItemUtils //.ftz_switch //.ftz_proto="inline bool instSupportsFtz(BrigOpcode16_t arg)" //.ftz_default="return false" + // + //.vecOpndIndex={undef} + //.vecOpndIndex_switch //.vecOpndIndex_proto="int vecOpndIndex(BrigOpcode16_t arg)" //.vecOpndIndex_default="return -1" + //.vecOpndIndex_incfile=ParserUtilities + // + //.numdst={undef} + //.numdst_switch //.numdst_proto="int instNumDstOperands(BrigOpcode16_t arg)" //.numdst_default="return 1" + // + //.print=$mnemo{ $mnemo } + + BRIG_OPCODE_NOP = 0, //.k=NOP //.hasType=false + BRIG_OPCODE_ABS = 1, //.k=BASIC_OR_MOD + BRIG_OPCODE_ADD = 2, //.k=BASIC_OR_MOD + BRIG_OPCODE_BORROW = 3, + BRIG_OPCODE_CARRY = 4, + BRIG_OPCODE_CEIL = 5, //.k=BASIC_OR_MOD + BRIG_OPCODE_COPYSIGN = 6, //.k=BASIC_OR_MOD + BRIG_OPCODE_DIV = 7, //.k=BASIC_OR_MOD + BRIG_OPCODE_FLOOR = 8, //.k=BASIC_OR_MOD + BRIG_OPCODE_FMA = 9, //.k=BASIC_OR_MOD + BRIG_OPCODE_FRACT = 10, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAD = 11, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAX = 12, //.k=BASIC_OR_MOD + BRIG_OPCODE_MIN = 13, //.k=BASIC_OR_MOD + BRIG_OPCODE_MUL = 14, //.k=BASIC_OR_MOD + BRIG_OPCODE_MULHI = 15, //.k=BASIC_OR_MOD + BRIG_OPCODE_NEG = 16, //.k=BASIC_OR_MOD + BRIG_OPCODE_REM = 17, + BRIG_OPCODE_RINT = 18, //.k=BASIC_OR_MOD + BRIG_OPCODE_SQRT = 19, //.k=BASIC_OR_MOD + BRIG_OPCODE_SUB = 20, //.k=BASIC_OR_MOD + BRIG_OPCODE_TRUNC = 21, //.k=BASIC_OR_MOD + BRIG_OPCODE_MAD24 = 22, + BRIG_OPCODE_MAD24HI = 23, + BRIG_OPCODE_MUL24 = 24, + BRIG_OPCODE_MUL24HI = 25, + BRIG_OPCODE_SHL = 26, + BRIG_OPCODE_SHR = 27, + BRIG_OPCODE_AND = 28, + BRIG_OPCODE_NOT = 29, + BRIG_OPCODE_OR = 30, + BRIG_OPCODE_POPCOUNT = 31, //.k=SOURCE_TYPE + BRIG_OPCODE_XOR = 32, + BRIG_OPCODE_BITEXTRACT = 33, + BRIG_OPCODE_BITINSERT = 34, + BRIG_OPCODE_BITMASK = 35, + BRIG_OPCODE_BITREV = 36, + BRIG_OPCODE_BITSELECT = 37, + BRIG_OPCODE_FIRSTBIT = 38, //.k=SOURCE_TYPE + BRIG_OPCODE_LASTBIT = 39, //.k=SOURCE_TYPE + BRIG_OPCODE_COMBINE = 40, //.k=SOURCE_TYPE //.vecOpndIndex=1 + BRIG_OPCODE_EXPAND = 41, //.k=SOURCE_TYPE //.vecOpndIndex=0 + BRIG_OPCODE_LDA = 42, //.k=ADDR + BRIG_OPCODE_MOV = 43, + BRIG_OPCODE_SHUFFLE = 44, + BRIG_OPCODE_UNPACKHI = 45, + BRIG_OPCODE_UNPACKLO = 46, + BRIG_OPCODE_PACK = 47, //.k=SOURCE_TYPE + BRIG_OPCODE_UNPACK = 48, //.k=SOURCE_TYPE + BRIG_OPCODE_CMOV = 49, + BRIG_OPCODE_CLASS = 50, //.k=SOURCE_TYPE + BRIG_OPCODE_NCOS = 51, + BRIG_OPCODE_NEXP2 = 52, + BRIG_OPCODE_NFMA = 53, + BRIG_OPCODE_NLOG2 = 54, + BRIG_OPCODE_NRCP = 55, + BRIG_OPCODE_NRSQRT = 56, + BRIG_OPCODE_NSIN = 57, + BRIG_OPCODE_NSQRT = 58, + BRIG_OPCODE_BITALIGN = 59, + BRIG_OPCODE_BYTEALIGN = 60, + BRIG_OPCODE_PACKCVT = 61, //.k=SOURCE_TYPE + BRIG_OPCODE_UNPACKCVT = 62, //.k=SOURCE_TYPE + BRIG_OPCODE_LERP = 63, + BRIG_OPCODE_SAD = 64, //.k=SOURCE_TYPE + BRIG_OPCODE_SADHI = 65, //.k=SOURCE_TYPE + BRIG_OPCODE_SEGMENTP = 66, //.k=SEG_CVT + BRIG_OPCODE_FTOS = 67, //.k=SEG_CVT + BRIG_OPCODE_STOF = 68, //.k=SEG_CVT + BRIG_OPCODE_CMP = 69, //.k=CMP + BRIG_OPCODE_CVT = 70, //.k=CVT + BRIG_OPCODE_LD = 71, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_ST = 72, //.k=MEM //.has_memory_order //.vecOpndIndex=0 //.numdst=0 + BRIG_OPCODE_ATOMIC = 73, //.k=ATOMIC + BRIG_OPCODE_ATOMICNORET = 74, //.k=ATOMIC //.numdst=0 + BRIG_OPCODE_SIGNAL = 75, //.k=SIGNAL + BRIG_OPCODE_SIGNALNORET = 76, //.k=SIGNAL //.numdst=0 + BRIG_OPCODE_MEMFENCE = 77, //.k=MEM_FENCE //.numdst=0 + BRIG_OPCODE_RDIMAGE = 78, //.k=IMAGE //.vecOpndIndex=0 + BRIG_OPCODE_LDIMAGE = 79, //.k=IMAGE //.vecOpndIndex=0 + BRIG_OPCODE_STIMAGE = 80, //.k=IMAGE //.vecOpndIndex=0 //.numdst=0 + BRIG_OPCODE_IMAGEFENCE = 81, //.k=BASIC_NO_TYPE + BRIG_OPCODE_QUERYIMAGE = 82, //.k=QUERY_IMAGE + BRIG_OPCODE_QUERYSAMPLER = 83, //.k=QUERY_SAMPLER + BRIG_OPCODE_CBR = 84, //.k=BR //.numdst=0 + BRIG_OPCODE_BR = 85, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_SBR = 86, //.k=BR //.numdst=0 //.psopnd=SbrOperands + BRIG_OPCODE_BARRIER = 87, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_WAVEBARRIER = 88, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_ARRIVEFBAR = 89, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_INITFBAR = 90, //.k=BASIC_NO_TYPE //.numdst=0 //.hasType=false + BRIG_OPCODE_JOINFBAR = 91, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_LEAVEFBAR = 92, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_RELEASEFBAR = 93, //.k=BASIC_NO_TYPE //.numdst=0 + BRIG_OPCODE_WAITFBAR = 94, //.k=BR //.numdst=0 //.hasType=false + BRIG_OPCODE_LDF = 95, + BRIG_OPCODE_ACTIVELANECOUNT = 96, //.k=LANE + BRIG_OPCODE_ACTIVELANEID = 97, //.k=LANE + BRIG_OPCODE_ACTIVELANEMASK = 98, //.k=LANE //.vecOpndIndex=0 + BRIG_OPCODE_ACTIVELANEPERMUTE = 99, //.k=LANE + BRIG_OPCODE_CALL = 100, //.k=BR //.psopnd=CallOperands //.numdst=0 //.hasType=false + BRIG_OPCODE_SCALL = 101, //.k=BR //.psopnd=CallOperands //.numdst=0 + BRIG_OPCODE_ICALL = 102, //.k=BR //.psopnd=CallOperands //.numdst=0 + BRIG_OPCODE_RET = 103, //.k=BASIC_NO_TYPE + BRIG_OPCODE_ALLOCA = 104, //.k=MEM + BRIG_OPCODE_CURRENTWORKGROUPSIZE = 105, + BRIG_OPCODE_CURRENTWORKITEMFLATID = 106, + BRIG_OPCODE_DIM = 107, + BRIG_OPCODE_GRIDGROUPS = 108, + BRIG_OPCODE_GRIDSIZE = 109, + BRIG_OPCODE_PACKETCOMPLETIONSIG = 110, + BRIG_OPCODE_PACKETID = 111, + BRIG_OPCODE_WORKGROUPID = 112, + BRIG_OPCODE_WORKGROUPSIZE = 113, + BRIG_OPCODE_WORKITEMABSID = 114, + BRIG_OPCODE_WORKITEMFLATABSID = 115, + BRIG_OPCODE_WORKITEMFLATID = 116, + BRIG_OPCODE_WORKITEMID = 117, + BRIG_OPCODE_CLEARDETECTEXCEPT = 118, //.numdst=0 + BRIG_OPCODE_GETDETECTEXCEPT = 119, + BRIG_OPCODE_SETDETECTEXCEPT = 120, //.numdst=0 + BRIG_OPCODE_ADDQUEUEWRITEINDEX = 121, //.k=QUEUE + BRIG_OPCODE_CASQUEUEWRITEINDEX = 122, //.k=QUEUE + BRIG_OPCODE_LDQUEUEREADINDEX = 123, //.k=QUEUE + BRIG_OPCODE_LDQUEUEWRITEINDEX = 124, //.k=QUEUE + BRIG_OPCODE_STQUEUEREADINDEX = 125, //.k=QUEUE //.numdst=0 + BRIG_OPCODE_STQUEUEWRITEINDEX = 126, //.k=QUEUE //.numdst=0 + BRIG_OPCODE_CLOCK = 127, + BRIG_OPCODE_CUID = 128, + BRIG_OPCODE_DEBUGTRAP = 129, //.numdst=0 + BRIG_OPCODE_GROUPBASEPTR = 130, + BRIG_OPCODE_KERNARGBASEPTR = 131, + BRIG_OPCODE_LANEID = 132, + BRIG_OPCODE_MAXCUID = 133, + BRIG_OPCODE_MAXWAVEID = 134, + BRIG_OPCODE_NULLPTR = 135, //.k=SEG + BRIG_OPCODE_WAVEID = 136, + BRIG_OPCODE_FIRST_USER_DEFINED = 32768, //.skip + + BRIG_OPCODE_GCNMADU = (1u << 15) | 0, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNMADS = (1u << 15) | 1, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNMAX3 = (1u << 15) | 2, + BRIG_OPCODE_GCNMIN3 = (1u << 15) | 3, + BRIG_OPCODE_GCNMED3 = (1u << 15) | 4, + BRIG_OPCODE_GCNFLDEXP = (1u << 15) | 5, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNFREXP_EXP = (1u << 15) | 6, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNFREXP_MANT = (1u << 15) | 7, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNTRIG_PREOP = (1u << 15) | 8, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNBFM = (1u << 15) | 9, + BRIG_OPCODE_GCNLD = (1u << 15) | 10, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_GCNST = (1u << 15) | 11, //.k=MEM //.has_memory_order //.vecOpndIndex=0 + BRIG_OPCODE_GCNATOMIC = (1u << 15) | 12, //.k=ATOMIC + BRIG_OPCODE_GCNATOMICNORET = (1u << 15) | 13, //.k=ATOMIC //.mnemo=gcn_atomicNoRet + BRIG_OPCODE_GCNSLEEP = (1u << 15) | 14, + BRIG_OPCODE_GCNPRIORITY = (1u << 15) | 15, + BRIG_OPCODE_GCNREGIONALLOC = (1u << 15) | 16, //.k=BASIC_NO_TYPE //.mnemo=gcn_region_alloc + BRIG_OPCODE_GCNMSAD = (1u << 15) | 17, + BRIG_OPCODE_GCNQSAD = (1u << 15) | 18, + BRIG_OPCODE_GCNMQSAD = (1u << 15) | 19, + BRIG_OPCODE_GCNMQSAD4 = (1u << 15) | 20, //.k=BASIC_NO_TYPE + BRIG_OPCODE_GCNSADW = (1u << 15) | 21, + BRIG_OPCODE_GCNSADD = (1u << 15) | 22, + BRIG_OPCODE_GCNCONSUME = (1u << 15) | 23, //.k=ADDR //.mnemo=gcn_atomic_consume + BRIG_OPCODE_GCNAPPEND = (1u << 15) | 24, //.k=ADDR //.mnemo=gcn_atomic_append + BRIG_OPCODE_GCNB4XCHG = (1u << 15) | 25, //.mnemo=gcn_b4xchg + BRIG_OPCODE_GCNB32XCHG = (1u << 15) | 26, //.mnemo=gcn_b32xchg + BRIG_OPCODE_GCNMAX = (1u << 15) | 27, + BRIG_OPCODE_GCNMIN = (1u << 15) | 28, + BRIG_OPCODE_GCNDIVRELAXED = (1u << 15) | 29, //.k=BASIC_OR_MOD + BRIG_OPCODE_GCNDIVRELAXEDNARROW = (1u << 15) | 30, +}; + +enum BrigPack { + + //.tdcaption="Packing" + // + //.mnemo={ s/^BRIG_PACK_//;s/SAT$/_sat/;lc } + //.mnemo_token=_EMPacking + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_PACK_NONE = 0, //.mnemo="" + BRIG_PACK_PP = 1, + BRIG_PACK_PS = 2, + BRIG_PACK_SP = 3, + BRIG_PACK_SS = 4, + BRIG_PACK_S = 5, + BRIG_PACK_P = 6, + BRIG_PACK_PPSAT = 7, + BRIG_PACK_PSSAT = 8, + BRIG_PACK_SPSAT = 9, + BRIG_PACK_SSSAT = 10, + BRIG_PACK_SSAT = 11, + BRIG_PACK_PSAT = 12 +}; + +enum BrigProfile { + + //.mnemo={ s/^BRIG_PROFILE_//;'$'.lc } + //.mnemo_token=ETargetProfile + // + //.print=$mnemo{ $mnemo } + + BRIG_PROFILE_BASE = 0, + BRIG_PROFILE_FULL = 1, + + BRIG_PROFILE_UNDEF = 2 //.skip +}; + +enum BrigRegisterKind { + + //.mnemo={ s/^BRIG_REGISTER_KIND_//;'$'.lc(substr($_,0,1)) } + // + //.bits={ } + //.bits_switch //.bits_proto="unsigned getRegBits(BrigRegisterKind16_t arg)" //.bits_default="return (unsigned)-1" + // + //.nollvm + + BRIG_REGISTER_KIND_CONTROL = 0, //.bits=1 + BRIG_REGISTER_KIND_SINGLE = 1, //.bits=32 + BRIG_REGISTER_KIND_DOUBLE = 2, //.bits=64 + BRIG_REGISTER_KIND_QUAD = 3 //.bits=128 +}; + +enum BrigRound { + + //.mnemo={} + //.mnemo_fn=round2str //.mnemo_token=_EMRound + // + //.sat={/_SAT$/? "true" : "false"} + //.sat_switch //.sat_proto="bool isSatRounding(unsigned rounding)" //.sat_arg="rounding" + //.sat_default="return false" + // + //.sig={/_SIGNALING_/? "true" : "false"} + //.sig_switch //.sig_proto="bool isSignalingRounding(unsigned rounding)" //.sig_arg="rounding" + //.sig_default="return false" + // + //.int={/_INTEGER_/? "true" : "false"} + //.int_switch //.int_proto="bool isIntRounding(unsigned rounding)" //.int_arg="rounding" + //.int_default="return false" + // + //.flt={/_FLOAT_/? "true" : "false"} + //.flt_switch //.flt_proto="bool isFloatRounding(unsigned rounding)" //.flt_arg="rounding" + //.flt_default="return false" + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_ROUND_NONE = 0, //.no_mnemo + BRIG_ROUND_FLOAT_DEFAULT = 1, //.no_mnemo + BRIG_ROUND_FLOAT_NEAR_EVEN = 2, //.mnemo=near + BRIG_ROUND_FLOAT_ZERO = 3, //.mnemo=zero + BRIG_ROUND_FLOAT_PLUS_INFINITY = 4, //.mnemo=up + BRIG_ROUND_FLOAT_MINUS_INFINITY = 5, //.mnemo=down + BRIG_ROUND_INTEGER_NEAR_EVEN = 6, //.mnemo=neari + BRIG_ROUND_INTEGER_ZERO = 7, //.mnemo=zeroi + BRIG_ROUND_INTEGER_PLUS_INFINITY = 8, //.mnemo=upi + BRIG_ROUND_INTEGER_MINUS_INFINITY = 9, //.mnemo=downi + BRIG_ROUND_INTEGER_NEAR_EVEN_SAT = 10, //.mnemo=neari_sat + BRIG_ROUND_INTEGER_ZERO_SAT = 11, //.mnemo=zeroi_sat + BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT = 12, //.mnemo=upi_sat + BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT = 13, //.mnemo=downi_sat + BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN = 14, //.mnemo=sneari + BRIG_ROUND_INTEGER_SIGNALING_ZERO = 15, //.mnemo=szeroi + BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY = 16, //.mnemo=supi + BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY = 17, //.mnemo=sdowni + BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT = 18, //.mnemo=sneari_sat + BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT = 19, //.mnemo=szeroi_sat + BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20, //.mnemo=supi_sat + BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21 //.mnemo=sdowni_sat +}; + +enum BrigSamplerAddressing { + + //.mnemo={ s/^BRIG_ADDRESSING_//;lc } + //.mnemo_token=ESamplerAddressingMode + + BRIG_ADDRESSING_UNDEFINED = 0, + BRIG_ADDRESSING_CLAMP_TO_EDGE = 1, + BRIG_ADDRESSING_CLAMP_TO_BORDER = 2, + BRIG_ADDRESSING_REPEAT = 3, + BRIG_ADDRESSING_MIRRORED_REPEAT = 4, + + BRIG_ADDRESSING_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigSamplerCoordNormalization { + + //.mnemo={ s/^BRIG_COORD_//;lc } + //.mnemo_token=ESamplerCoord + // + //.print=$mnemo{ $mnemo } + + BRIG_COORD_UNNORMALIZED = 0, + BRIG_COORD_NORMALIZED = 1 +}; + +enum BrigSamplerFilter { + + //.mnemo={ s/^BRIG_FILTER_//;lc } + // + //.print=$mnemo{ $mnemo } + + BRIG_FILTER_NEAREST = 0, + BRIG_FILTER_LINEAR = 1, + + BRIG_FILTER_FIRST_USER_DEFINED = 128 //.skip +}; + +enum BrigSamplerQuery { + + //.mnemo={ s/^BRIG_SAMPLER_QUERY_//;lc } + //.mnemo_token=_EMSamplerQuery + // + //.print=$mnemo{ $mnemo } + + BRIG_SAMPLER_QUERY_ADDRESSING = 0, + BRIG_SAMPLER_QUERY_COORD = 1, + BRIG_SAMPLER_QUERY_FILTER = 2 +}; + +enum BrigSectionIndex { + + //.nollvm + // + //.mnemo={ s/^BRIG_SECTION_INDEX_/HSA_/;lc } + + BRIG_SECTION_INDEX_DATA = 0, + BRIG_SECTION_INDEX_CODE = 1, + BRIG_SECTION_INDEX_OPERAND = 2, + BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED = 3, + + // used internally + BRIG_SECTION_INDEX_IMPLEMENTATION_DEFINED = BRIG_SECTION_INDEX_BEGIN_IMPLEMENTATION_DEFINED //.skip +}; + +enum BrigSegCvtModifierMask { + BRIG_SEG_CVT_NONULL = 1 //.mnemo="nonull" //.print="_nonull" +}; + +enum BrigSegment { + + //.mnemo={ s/^BRIG_SEGMENT_//;lc} + //.mnemo_token=_EMSegment + //.mnemo_context=EInstModifierContext + // + //.print=$mnemo{ $mnemo ? "_$mnemo" : "" } + + BRIG_SEGMENT_NONE = 0, //.mnemo="" + BRIG_SEGMENT_FLAT = 1, //.mnemo="" + BRIG_SEGMENT_GLOBAL = 2, + BRIG_SEGMENT_READONLY = 3, + BRIG_SEGMENT_KERNARG = 4, + BRIG_SEGMENT_GROUP = 5, + BRIG_SEGMENT_PRIVATE = 6, + BRIG_SEGMENT_SPILL = 7, + BRIG_SEGMENT_ARG = 8, + + BRIG_SEGMENT_FIRST_USER_DEFINED = 128, //.skip + + BRIG_SEGMENT_AMD_GCN = 9, //.mnemo="region" +}; + +enum BrigPackedTypeBits { + + //.nodump + // + //.nollvm + + BRIG_TYPE_BASE_SIZE = 5, + BRIG_TYPE_PACK_SIZE = 2, + BRIG_TYPE_ARRAY_SIZE = 1, + + BRIG_TYPE_BASE_SHIFT = 0, + BRIG_TYPE_PACK_SHIFT = BRIG_TYPE_BASE_SHIFT + BRIG_TYPE_BASE_SIZE, + BRIG_TYPE_ARRAY_SHIFT = BRIG_TYPE_PACK_SHIFT + BRIG_TYPE_PACK_SIZE, + + BRIG_TYPE_BASE_MASK = ((1 << BRIG_TYPE_BASE_SIZE) - 1) << BRIG_TYPE_BASE_SHIFT, + BRIG_TYPE_PACK_MASK = ((1 << BRIG_TYPE_PACK_SIZE) - 1) << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_ARRAY_MASK = ((1 << BRIG_TYPE_ARRAY_SIZE) - 1) << BRIG_TYPE_ARRAY_SHIFT, + + BRIG_TYPE_PACK_NONE = 0 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_32 = 1 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_64 = 2 << BRIG_TYPE_PACK_SHIFT, + BRIG_TYPE_PACK_128 = 3 << BRIG_TYPE_PACK_SHIFT, + + BRIG_TYPE_ARRAY = 1 << BRIG_TYPE_ARRAY_SHIFT +}; + +enum BrigType { + + //.numBits={ /ARRAY$/ ? undef : /([0-9]+)X([0-9]+)/ ? $1*$2 : /([0-9]+)/ ? $1 : undef } + //.numBits_switch //.numBits_proto="unsigned getBrigTypeNumBits(unsigned arg)" //.numBits_default="assert(0); return 0" + //.numBytes=$numBits{ $numBits > 1 ? $numBits/8 : undef } + //.numBytes_switch //.numBytes_proto="unsigned getBrigTypeNumBytes(unsigned arg)" //.numBytes_default="assert(0); return 0" + // + //.mnemo={ s/^BRIG_TYPE_//;lc } + //.mnemo_token=_EMType + // + //.array={/ARRAY$/?"true":"false"} + //.array_switch //.array_proto="bool isArrayType(unsigned type)" //.array_arg="type" + //.array_default="return false" + // + //.a2e={/(.*)_ARRAY$/? $1 : "BRIG_TYPE_NONE"} + //.a2e_switch //.a2e_proto="unsigned arrayType2elementType(unsigned type)" //.a2e_arg="type" + //.a2e_default="return BRIG_TYPE_NONE" + // + //.e2a={/_ARRAY$/? "BRIG_TYPE_NONE" : /_NONE$/ ? "BRIG_TYPE_NONE" : /_B1$/ ? "BRIG_TYPE_NONE" : $_ . "_ARRAY"} + //.e2a_switch //.e2a_proto="unsigned elementType2arrayType(unsigned type)" //.e2a_arg="type" + //.e2a_default="return BRIG_TYPE_NONE" + // + //.t2s={s/^BRIG_TYPE_//;lc s/_ARRAY$/[]/;lc} + //.t2s_switch //.t2s_proto="const char* type2name(unsigned type)" //.t2s_arg="type" + //.t2s_default="return NULL" + // + //.dispatch_switch //.dispatch_incfile=TemplateUtilities + //.dispatch_proto="template\nRetType dispatchByType_gen(unsigned type, Visitor& v)" + //.dispatch={ /ARRAY$/ ? "v.visitNone(type)" : /^BRIG_TYPE_([BUSF]|SIG)[0-9]+/ ? "v.template visit< BrigTypeTraits<$_> >()" : "v.visitNone(type)" } + //.dispatch_arg="type" //.dispatch_default="return v.visitNone(type)" + // + //- .tdname=BrigType + // + //.print=$mnemo{ "_$mnemo" } + + BRIG_TYPE_NONE = 0, //.mnemo="" //.print="" + BRIG_TYPE_U8 = 1, //.ctype=uint8_t + BRIG_TYPE_U16 = 2, //.ctype=uint16_t + BRIG_TYPE_U32 = 3, //.ctype=uint32_t + BRIG_TYPE_U64 = 4, //.ctype=uint64_t + BRIG_TYPE_S8 = 5, //.ctype=int8_t + BRIG_TYPE_S16 = 6, //.ctype=int16_t + BRIG_TYPE_S32 = 7, //.ctype=int32_t + BRIG_TYPE_S64 = 8, //.ctype=int64_t + BRIG_TYPE_F16 = 9, //.ctype=f16_t + BRIG_TYPE_F32 = 10, //.ctype=float + BRIG_TYPE_F64 = 11, //.ctype=double + BRIG_TYPE_B1 = 12, //.ctype=bool //.numBytes=1 + BRIG_TYPE_B8 = 13, //.ctype=uint8_t + BRIG_TYPE_B16 = 14, //.ctype=uint16_t + BRIG_TYPE_B32 = 15, //.ctype=uint32_t + BRIG_TYPE_B64 = 16, //.ctype=uint64_t + BRIG_TYPE_B128 = 17, //.ctype=b128_t + BRIG_TYPE_SAMP = 18, //.mnemo=samp //.numBits=64 + BRIG_TYPE_ROIMG = 19, //.mnemo=roimg //.numBits=64 + BRIG_TYPE_WOIMG = 20, //.mnemo=woimg //.numBits=64 + BRIG_TYPE_RWIMG = 21, //.mnemo=rwimg //.numBits=64 + BRIG_TYPE_SIG32 = 22, //.mnemo=sig32 //.numBits=64 + BRIG_TYPE_SIG64 = 23, //.mnemo=sig64 //.numBits=64 + + BRIG_TYPE_U8X4 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_32, //.ctype=uint8_t + BRIG_TYPE_U8X8 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_64, //.ctype=uint8_t + BRIG_TYPE_U8X16 = BRIG_TYPE_U8 | BRIG_TYPE_PACK_128, //.ctype=uint8_t + BRIG_TYPE_U16X2 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_32, //.ctype=uint16_t + BRIG_TYPE_U16X4 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_64, //.ctype=uint16_t + BRIG_TYPE_U16X8 = BRIG_TYPE_U16 | BRIG_TYPE_PACK_128, //.ctype=uint16_t + BRIG_TYPE_U32X2 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_64, //.ctype=uint32_t + BRIG_TYPE_U32X4 = BRIG_TYPE_U32 | BRIG_TYPE_PACK_128, //.ctype=uint32_t + BRIG_TYPE_U64X2 = BRIG_TYPE_U64 | BRIG_TYPE_PACK_128, //.ctype=uint64_t + BRIG_TYPE_S8X4 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_32, //.ctype=int8_t + BRIG_TYPE_S8X8 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_64, //.ctype=int8_t + BRIG_TYPE_S8X16 = BRIG_TYPE_S8 | BRIG_TYPE_PACK_128, //.ctype=int8_t + BRIG_TYPE_S16X2 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_32, //.ctype=int16_t + BRIG_TYPE_S16X4 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_64, //.ctype=int16_t + BRIG_TYPE_S16X8 = BRIG_TYPE_S16 | BRIG_TYPE_PACK_128, //.ctype=int16_t + BRIG_TYPE_S32X2 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_64, //.ctype=int32_t + BRIG_TYPE_S32X4 = BRIG_TYPE_S32 | BRIG_TYPE_PACK_128, //.ctype=int32_t + BRIG_TYPE_S64X2 = BRIG_TYPE_S64 | BRIG_TYPE_PACK_128, //.ctype=int64_t + BRIG_TYPE_F16X2 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_32, //.ctype=f16_t + BRIG_TYPE_F16X4 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_64, //.ctype=f16_t + BRIG_TYPE_F16X8 = BRIG_TYPE_F16 | BRIG_TYPE_PACK_128, //.ctype=f16_t + BRIG_TYPE_F32X2 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_64, //.ctype=float + BRIG_TYPE_F32X4 = BRIG_TYPE_F32 | BRIG_TYPE_PACK_128, //.ctype=float + BRIG_TYPE_F64X2 = BRIG_TYPE_F64 | BRIG_TYPE_PACK_128, //.ctype=double + + BRIG_TYPE_U8_ARRAY = BRIG_TYPE_U8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16_ARRAY = BRIG_TYPE_U16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32_ARRAY = BRIG_TYPE_U32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U64_ARRAY = BRIG_TYPE_U64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8_ARRAY = BRIG_TYPE_S8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16_ARRAY = BRIG_TYPE_S16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32_ARRAY = BRIG_TYPE_S32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S64_ARRAY = BRIG_TYPE_S64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16_ARRAY = BRIG_TYPE_F16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32_ARRAY = BRIG_TYPE_F32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F64_ARRAY = BRIG_TYPE_F64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B8_ARRAY = BRIG_TYPE_B8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B16_ARRAY = BRIG_TYPE_B16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B32_ARRAY = BRIG_TYPE_B32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B64_ARRAY = BRIG_TYPE_B64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_B128_ARRAY = BRIG_TYPE_B128 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SAMP_ARRAY = BRIG_TYPE_SAMP | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_ROIMG_ARRAY = BRIG_TYPE_ROIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_WOIMG_ARRAY = BRIG_TYPE_WOIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_RWIMG_ARRAY = BRIG_TYPE_RWIMG | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SIG32_ARRAY = BRIG_TYPE_SIG32 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_SIG64_ARRAY = BRIG_TYPE_SIG64 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X4_ARRAY = BRIG_TYPE_U8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X8_ARRAY = BRIG_TYPE_U8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U8X16_ARRAY = BRIG_TYPE_U8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X2_ARRAY = BRIG_TYPE_U16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X4_ARRAY = BRIG_TYPE_U16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U16X8_ARRAY = BRIG_TYPE_U16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32X2_ARRAY = BRIG_TYPE_U32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U32X4_ARRAY = BRIG_TYPE_U32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_U64X2_ARRAY = BRIG_TYPE_U64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X4_ARRAY = BRIG_TYPE_S8X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X8_ARRAY = BRIG_TYPE_S8X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S8X16_ARRAY = BRIG_TYPE_S8X16 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X2_ARRAY = BRIG_TYPE_S16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X4_ARRAY = BRIG_TYPE_S16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S16X8_ARRAY = BRIG_TYPE_S16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32X2_ARRAY = BRIG_TYPE_S32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S32X4_ARRAY = BRIG_TYPE_S32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_S64X2_ARRAY = BRIG_TYPE_S64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X2_ARRAY = BRIG_TYPE_F16X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X4_ARRAY = BRIG_TYPE_F16X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F16X8_ARRAY = BRIG_TYPE_F16X8 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32X2_ARRAY = BRIG_TYPE_F32X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F32X4_ARRAY = BRIG_TYPE_F32X4 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + BRIG_TYPE_F64X2_ARRAY = BRIG_TYPE_F64X2 | BRIG_TYPE_ARRAY, //.mnemo="" //.print="" + + // Used internally + BRIG_TYPE_INVALID = (unsigned) -1 //.skip +}; + +enum BrigVariableModifierMask { + + //.nodump + + BRIG_VARIABLE_DEFINITION = 1, + BRIG_VARIABLE_CONST = 2 +}; + +enum BrigWidth { + + //.tddef=1 + // + //.print={ s/^BRIG_WIDTH_//; "_width($_)" } + + BRIG_WIDTH_NONE = 0, + BRIG_WIDTH_1 = 1, + BRIG_WIDTH_2 = 2, + BRIG_WIDTH_4 = 3, + BRIG_WIDTH_8 = 4, + BRIG_WIDTH_16 = 5, + BRIG_WIDTH_32 = 6, + BRIG_WIDTH_64 = 7, + BRIG_WIDTH_128 = 8, + BRIG_WIDTH_256 = 9, + BRIG_WIDTH_512 = 10, + BRIG_WIDTH_1024 = 11, + BRIG_WIDTH_2048 = 12, + BRIG_WIDTH_4096 = 13, + BRIG_WIDTH_8192 = 14, + BRIG_WIDTH_16384 = 15, + BRIG_WIDTH_32768 = 16, + BRIG_WIDTH_65536 = 17, + BRIG_WIDTH_131072 = 18, + BRIG_WIDTH_262144 = 19, + BRIG_WIDTH_524288 = 20, + BRIG_WIDTH_1048576 = 21, + BRIG_WIDTH_2097152 = 22, + BRIG_WIDTH_4194304 = 23, + BRIG_WIDTH_8388608 = 24, + BRIG_WIDTH_16777216 = 25, + BRIG_WIDTH_33554432 = 26, + BRIG_WIDTH_67108864 = 27, + BRIG_WIDTH_134217728 = 28, + BRIG_WIDTH_268435456 = 29, + BRIG_WIDTH_536870912 = 30, + BRIG_WIDTH_1073741824 = 31, + BRIG_WIDTH_2147483648 = 32, + BRIG_WIDTH_WAVESIZE = 33, + BRIG_WIDTH_ALL = 34, + + BRIG_WIDTH_LAST //.skip +}; + +enum BrigExceptionsMask { + BRIG_EXCEPTIONS_INVALID_OPERATION = 1 << 0, + BRIG_EXCEPTIONS_DIVIDE_BY_ZERO = 1 << 1, + BRIG_EXCEPTIONS_OVERFLOW = 1 << 2, + BRIG_EXCEPTIONS_UNDERFLOW = 1 << 3, + BRIG_EXCEPTIONS_INEXACT = 1 << 4, + + BRIG_EXCEPTIONS_FIRST_USER_DEFINED = 1 << 16 +}; +#endif + +#endif Index: lib/Target/HSAIL/HSAILComparisons.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILComparisons.td @@ -0,0 +1,9 @@ + +defm CMP : InstCmp_RetTypes<"cmp", BrigOpcode.CMP>; + +//////////////////////////////////////////////////////////////////////////////// +// floating-point classify instructions + +defm CLASS : InstSourceType_2Op_Class_Types<"class", BrigOpcode.CLASS>; +def : InstSourceType_Class_Pat; +def : InstSourceType_Class_Pat; Index: lib/Target/HSAIL/HSAILControlFlow.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILControlFlow.td @@ -0,0 +1,77 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// + +let isNotDuplicable = 1, isCall = 1 in { + // No semicolon because we need to specially handle the printing of + // the variable_ops. + def CALL : HSAILInst<(outs), + (ins calltarget:$dest, variable_ops), + "call\t$dest ", [], + 0, + 0 + >; +} + +//////////////////////////////////////////////////////////////////////////////// + +let isNotDuplicable = 1, hasSideEffects = 1 in { + def ARG_SCOPE_START : HSAILInst< + (outs), + (ins i32imm:$src0), + "\\{", + [(IL_callseq_start timm:$src0)], + 0, + 0 + >; + + def ARG_SCOPE_END : HSAILInst< + (outs), + (ins i32imm:$src0, i32imm:$src1), + "\\}", + [(IL_callseq_end timm:$src0, timm:$src1)], + 0, + 0 + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// ret + +let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, + isNotDuplicable = 1, hasSideEffects = 1 in { + def RET : HSAILInstBasic_0Op_NoRet<"ret", BrigOpcode.RET, Inst_Void>; +} + +def : InstBasic_0Op_NoRet_Pat; + +//////////////////////////////////////////////////////////////////////////////// +// branch (unconditional and conditional) + + +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + let isBarrier = 1, WidthAttr = WidthAttrValues.ALL in { + def BR : HSAILInstBr_1Op_NoRet<"br", BrigOpcode.BR>; + } + + def CBR : HSAILInstBr_2Op_NoRet<"cbr", BrigOpcode.CBR>; + + // TODO: can sbr be used to support brind? +} + +def : Pat< + (br bb:$src0), + (BR BrigWidth._ALL, bb:$src0, BrigType.NONE) +>; + +def : Pat< + (brcond i1:$src0, bb:$src1), + (CBR BrigWidth._1, $src0, bb:$src1, BrigType.B1) +>; Index: lib/Target/HSAIL/HSAILConversions.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILConversions.td @@ -0,0 +1,343 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// conversion routines + + +class CvtPat : Pat< + (destTy.VT (node srcTy.VT:$src)), + (!cast("CVT"#destTy.InstName#srcTy.InstName) ftz, roundmode, destTy.BT, srcTy.BT, $src) +>; + +// cvt changes behavior for some reason when the dest type is b1, and +// does != 0 instead of truncate, so convert to i1 range values before +// doing conversion. +class Trunc_B1_Pat : Pat< + (i1 (trunc srcTy.VT:$src)), + (!cast("CVT_B1"#srcTy.InstName) 0, BrigRound.NONE, BrigType.B1, srcTy.BT, (andInst $src, 1, andTy.BT)) +>; + +class CvtSInt32RoundPat : Pat< + (i32 (fp_to_sint (roundop srcTy.VT:$src))), + (!cast("CVT_S32"#srcTy.InstName) ftz, roundmode, BrigType.S32, srcTy.BT, $src) +>; + +let isConv = 1 in { + defm RINT : InstMod_1Op_FPTypes<"rint", BrigOpcode.RINT>; + defm FLOOR : InstMod_1Op_FPTypes<"floor", BrigOpcode.FLOOR>; + defm CEIL : InstMod_1Op_FPTypes<"ceil", BrigOpcode.CEIL>; + defm TRUNC : InstMod_1Op_FPTypes<"trunc", BrigOpcode.TRUNC>; +} + + +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; +//def : CvtPat; // FIXME +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; +//def : CvtPat; // FIXME +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : Trunc_B1_Pat; +def : Trunc_B1_Pat; +def : CvtPat; + +// ftz conversion complies our OpenCL contract wrt f64 denorms, +// because f32->f64 would not yield f64 denorms, so whether f64 is +// flushed or not does not really matter. +def : CvtPat; +def : CvtPat; + +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; + +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; +def : CvtSInt32RoundPat; + + +//////////////////////////////////////////////////////////////////////////////// +// support for explicit conversions + +// float to int +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// float to uint +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + + +// float to long +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// float to ulong +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + + + +// double to int +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// double to uint +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + + +// double to long +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// double to ulong +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// half to float +def : CvtPat; + +// float to half +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// double to half +def : CvtPat; +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// int to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// uint to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// long to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// ulong to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// long to double +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// ulong to double +def : CvtPat; +def : CvtPat; +def : CvtPat; + +// double to float +def : CvtPat; +def : CvtPat; +def : CvtPat; + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; + +def : InstMod_1Op_Pat; +def : InstMod_1Op_Pat; Index: lib/Target/HSAIL/HSAILELFTargetObjectFile.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILELFTargetObjectFile.h @@ -0,0 +1,57 @@ +//===-- HSAILELFObjectFile.h - HSAIL ELF Object Info ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILELFTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_HSAIL_HSAILELFTARGETOBJECTFILE_H + +#include "HSAILSection.h" + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" + +namespace llvm { + +class HSAILTargetObjectFile : public TargetLoweringObjectFileELF { +public: + HSAILTargetObjectFile(); + void Initialize(MCContext &ctx, const TargetMachine &TM) override; + + MCSection *getSectionForConstant(SectionKind Kind, + const Constant *C) const override { + return ReadOnlySection; + } + + MCSection *getExplicitSectionGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const override { + return DataSection; + } + + MCSection *SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const override { + return getDataSection(); + } +}; + +// FIXME: Do we really need both of these? +class BRIG32_DwarfTargetObjectFile : public TargetLoweringObjectFileELF { +public: + BRIG32_DwarfTargetObjectFile(){}; + virtual ~BRIG32_DwarfTargetObjectFile(); +}; + +class BRIG64_DwarfTargetObjectFile : public TargetLoweringObjectFileELF { +public: + BRIG64_DwarfTargetObjectFile(){}; + virtual ~BRIG64_DwarfTargetObjectFile(); +}; + +} // end namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILELFTargetObjectFile.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILELFTargetObjectFile.cpp @@ -0,0 +1,84 @@ +//===-- HSAILELFTargetObjectFile.cpp ----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILELFTargetObjectFile.h" + +using namespace llvm; + + +HSAILTargetObjectFile::HSAILTargetObjectFile() { + TextSection = nullptr; + DataSection = nullptr; + BSSSection = nullptr; + ReadOnlySection = nullptr; + + StaticCtorSection = nullptr; + StaticDtorSection = nullptr; + LSDASection = nullptr; + EHFrameSection = nullptr; + DwarfAbbrevSection = nullptr; + DwarfInfoSection = nullptr; + DwarfLineSection = nullptr; + DwarfFrameSection = nullptr; + DwarfPubTypesSection = nullptr; + DwarfDebugInlineSection = nullptr; + DwarfStrSection = nullptr; + DwarfLocSection = nullptr; + DwarfARangesSection = nullptr; + DwarfRangesSection = nullptr; +} + +void HSAILTargetObjectFile::Initialize(MCContext &ctx, + const TargetMachine &TM) { + TargetLoweringObjectFile::Initialize(ctx, TM); + + TextSection = new HSAILSection(MCSection::SV_ELF, SectionKind::getText()); + DataSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getDataRel()); + BSSSection = new HSAILSection(MCSection::SV_ELF, SectionKind::getBSS()); + ReadOnlySection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getReadOnly()); + + StaticCtorSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + StaticDtorSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + LSDASection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + EHFrameSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfAbbrevSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfInfoSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLineSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfFrameSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfPubTypesSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfDebugInlineSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfStrSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfLocSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfARangesSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); + DwarfRangesSection = + new HSAILSection(MCSection::SV_ELF, SectionKind::getMetadata()); +} + +BRIG32_DwarfTargetObjectFile::~BRIG32_DwarfTargetObjectFile() { + +} + +BRIG64_DwarfTargetObjectFile::~BRIG64_DwarfTargetObjectFile() { + +} Index: lib/Target/HSAIL/HSAILEnums.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILEnums.td @@ -0,0 +1,662 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +let OperandType = "OPERAND_IMMEDIATE" in { + +def BrigAlignment : Operand { + let PrintMethod = "printBrigAlignment"; + int _NONE = 0; + int _1 = 1; + int _2 = 2; + int _4 = 3; + int _8 = 4; + int _16 = 5; + int _32 = 6; + int _64 = 7; + int _128 = 8; + int _256 = 9; + int _LAST = 10; + int _MAX = 9; +} + +def BrigAllocation : Operand { + let PrintMethod = "printBrigAllocation"; + int NONE = 0; + int PROGRAM = 1; + int AGENT = 2; + int AUTOMATIC = 3; +} + +def BrigAluModifierMask : Operand { + let PrintMethod = "printBrigAluModifierMask"; + int FTZ = 1; +} + +def BrigAtomicOperation : Operand { + let PrintMethod = "printBrigAtomicOperation"; + int ADD = 0; + int AND = 1; + int CAS = 2; + int EXCH = 3; + int LD = 4; + int MAX = 5; + int MIN = 6; + int OR = 7; + int ST = 8; + int SUB = 9; + int WRAPDEC = 10; + int WRAPINC = 11; + int XOR = 12; + int WAIT_EQ = 13; + int WAIT_NE = 14; + int WAIT_LT = 15; + int WAIT_GTE = 16; + int WAITTIMEOUT_EQ = 17; + int WAITTIMEOUT_NE = 18; + int WAITTIMEOUT_LT = 19; + int WAITTIMEOUT_GTE = 20; +} + +def BrigCompareOperation : Operand { + let PrintMethod = "printBrigCompareOperation"; + int EQ = 0; + int NE = 1; + int LT = 2; + int LE = 3; + int GT = 4; + int GE = 5; + int EQU = 6; + int NEU = 7; + int LTU = 8; + int LEU = 9; + int GTU = 10; + int GEU = 11; + int NUM = 12; + int NAN = 13; + int SEQ = 14; + int SNE = 15; + int SLT = 16; + int SLE = 17; + int SGT = 18; + int SGE = 19; + int SGEU = 20; + int SEQU = 21; + int SNEU = 22; + int SLTU = 23; + int SLEU = 24; + int SNUM = 25; + int SNAN = 26; + int SGTU = 27; +} + +def BrigControlDirective : Operand { + let PrintMethod = "printBrigControlDirective"; + int NONE = 0; + int ENABLEBREAKEXCEPTIONS = 1; + int ENABLEDETECTEXCEPTIONS = 2; + int MAXDYNAMICGROUPSIZE = 3; + int MAXFLATGRIDSIZE = 4; + int MAXFLATWORKGROUPSIZE = 5; + int REQUIREDDIM = 6; + int REQUIREDGRIDSIZE = 7; + int REQUIREDWORKGROUPSIZE = 8; + int REQUIRENOPARTIALWORKGROUPS = 9; +} + +def BrigExecutableModifierMask : Operand { + let PrintMethod = "printBrigExecutableModifierMask"; + int DEFINITION = 1; +} + +def BrigImageChannelOrder : Operand { + let PrintMethod = "printBrigImageChannelOrder"; + int A = 0; + int R = 1; + int RX = 2; + int RG = 3; + int RGX = 4; + int RA = 5; + int RGB = 6; + int RGBX = 7; + int RGBA = 8; + int BGRA = 9; + int ARGB = 10; + int ABGR = 11; + int SRGB = 12; + int SRGBX = 13; + int SRGBA = 14; + int SBGRA = 15; + int INTENSITY = 16; + int LUMINANCE = 17; + int DEPTH = 18; + int DEPTH_STENCIL = 19; + int UNKNOWN = 20; + int FIRST_USER_DEFINED = 128; +} + +def BrigImageChannelType : Operand { + let PrintMethod = "printBrigImageChannelType"; + int SNORM_INT8 = 0; + int SNORM_INT16 = 1; + int UNORM_INT8 = 2; + int UNORM_INT16 = 3; + int UNORM_INT24 = 4; + int UNORM_SHORT_555 = 5; + int UNORM_SHORT_565 = 6; + int UNORM_INT_101010 = 7; + int SIGNED_INT8 = 8; + int SIGNED_INT16 = 9; + int SIGNED_INT32 = 10; + int UNSIGNED_INT8 = 11; + int UNSIGNED_INT16 = 12; + int UNSIGNED_INT32 = 13; + int HALF_FLOAT = 14; + int FLOAT = 15; + int UNKNOWN = 16; + int FIRST_USER_DEFINED = 128; +} + +def BrigImageGeometry : Operand { + let PrintMethod = "printBrigImageGeometry"; + int _1D = 0; + int _2D = 1; + int _3D = 2; + int _1DA = 3; + int _2DA = 4; + int _1DB = 5; + int _2DDEPTH = 6; + int _2DADEPTH = 7; + int _UNKNOWN = 8; + int _FIRST_USER_DEFINED = 128; +} + +def BrigImageQuery : Operand { + let PrintMethod = "printBrigImageQuery"; + int WIDTH = 0; + int HEIGHT = 1; + int DEPTH = 2; + int ARRAY = 3; + int CHANNELORDER = 4; + int CHANNELTYPE = 5; +} + +def BrigLinkage : Operand { + let PrintMethod = "printBrigLinkage"; + int NONE = 0; + int PROGRAM = 1; + int MODULE = 2; + int FUNCTION = 3; + int ARG = 4; +} + +def BrigMachineModel : Operand { + let PrintMethod = "printBrigMachineModel"; + int SMALL = 0; + int LARGE = 1; + int UNDEF = 2; +} + +def BrigMemoryModifierMask : Operand { + let PrintMethod = "printBrigMemoryModifierMask"; + int CONST = 1; +} + +def BrigMemoryOrder : Operand { + let PrintMethod = "printBrigMemoryOrder"; + int NONE = 0; + int RELAXED = 1; + int SC_ACQUIRE = 2; + int SC_RELEASE = 3; + int SC_ACQUIRE_RELEASE = 4; + int LAST = 5; +} + +def BrigMemoryScope : Operand { + let PrintMethod = "printBrigMemoryScope"; + int NONE = 0; + int WORKITEM = 1; + int WAVEFRONT = 2; + int WORKGROUP = 3; + int AGENT = 4; + int SYSTEM = 5; + int LAST = 6; +} + +def BrigOpcode : Operand { + let PrintMethod = "printBrigOpcode"; + int NOP = 0; + int ABS = 1; + int ADD = 2; + int BORROW = 3; + int CARRY = 4; + int CEIL = 5; + int COPYSIGN = 6; + int DIV = 7; + int FLOOR = 8; + int FMA = 9; + int FRACT = 10; + int MAD = 11; + int MAX = 12; + int MIN = 13; + int MUL = 14; + int MULHI = 15; + int NEG = 16; + int REM = 17; + int RINT = 18; + int SQRT = 19; + int SUB = 20; + int TRUNC = 21; + int MAD24 = 22; + int MAD24HI = 23; + int MUL24 = 24; + int MUL24HI = 25; + int SHL = 26; + int SHR = 27; + int AND = 28; + int NOT = 29; + int OR = 30; + int POPCOUNT = 31; + int XOR = 32; + int BITEXTRACT = 33; + int BITINSERT = 34; + int BITMASK = 35; + int BITREV = 36; + int BITSELECT = 37; + int FIRSTBIT = 38; + int LASTBIT = 39; + int COMBINE = 40; + int EXPAND = 41; + int LDA = 42; + int MOV = 43; + int SHUFFLE = 44; + int UNPACKHI = 45; + int UNPACKLO = 46; + int PACK = 47; + int UNPACK = 48; + int CMOV = 49; + int CLASS = 50; + int NCOS = 51; + int NEXP2 = 52; + int NFMA = 53; + int NLOG2 = 54; + int NRCP = 55; + int NRSQRT = 56; + int NSIN = 57; + int NSQRT = 58; + int BITALIGN = 59; + int BYTEALIGN = 60; + int PACKCVT = 61; + int UNPACKCVT = 62; + int LERP = 63; + int SAD = 64; + int SADHI = 65; + int SEGMENTP = 66; + int FTOS = 67; + int STOF = 68; + int CMP = 69; + int CVT = 70; + int LD = 71; + int ST = 72; + int ATOMIC = 73; + int ATOMICNORET = 74; + int SIGNAL = 75; + int SIGNALNORET = 76; + int MEMFENCE = 77; + int RDIMAGE = 78; + int LDIMAGE = 79; + int STIMAGE = 80; + int IMAGEFENCE = 81; + int QUERYIMAGE = 82; + int QUERYSAMPLER = 83; + int CBR = 84; + int BR = 85; + int SBR = 86; + int BARRIER = 87; + int WAVEBARRIER = 88; + int ARRIVEFBAR = 89; + int INITFBAR = 90; + int JOINFBAR = 91; + int LEAVEFBAR = 92; + int RELEASEFBAR = 93; + int WAITFBAR = 94; + int LDF = 95; + int ACTIVELANECOUNT = 96; + int ACTIVELANEID = 97; + int ACTIVELANEMASK = 98; + int ACTIVELANEPERMUTE = 99; + int CALL = 100; + int SCALL = 101; + int ICALL = 102; + int RET = 103; + int ALLOCA = 104; + int CURRENTWORKGROUPSIZE = 105; + int CURRENTWORKITEMFLATID = 106; + int DIM = 107; + int GRIDGROUPS = 108; + int GRIDSIZE = 109; + int PACKETCOMPLETIONSIG = 110; + int PACKETID = 111; + int WORKGROUPID = 112; + int WORKGROUPSIZE = 113; + int WORKITEMABSID = 114; + int WORKITEMFLATABSID = 115; + int WORKITEMFLATID = 116; + int WORKITEMID = 117; + int CLEARDETECTEXCEPT = 118; + int GETDETECTEXCEPT = 119; + int SETDETECTEXCEPT = 120; + int ADDQUEUEWRITEINDEX = 121; + int CASQUEUEWRITEINDEX = 122; + int LDQUEUEREADINDEX = 123; + int LDQUEUEWRITEINDEX = 124; + int STQUEUEREADINDEX = 125; + int STQUEUEWRITEINDEX = 126; + int CLOCK = 127; + int CUID = 128; + int DEBUGTRAP = 129; + int GROUPBASEPTR = 130; + int KERNARGBASEPTR = 131; + int LANEID = 132; + int MAXCUID = 133; + int MAXWAVEID = 134; + int NULLPTR = 135; + int WAVEID = 136; + int FIRST_USER_DEFINED = 32768; + int GCNMADU = 32768; + int GCNMADS = 32769; + int GCNMAX3 = 32770; + int GCNMIN3 = 32771; + int GCNMED3 = 32772; + int GCNFLDEXP = 32773; + int GCNFREXP_EXP = 32774; + int GCNFREXP_MANT = 32775; + int GCNTRIG_PREOP = 32776; + int GCNBFM = 32777; + int GCNLD = 32778; + int GCNST = 32779; + int GCNATOMIC = 32780; + int GCNATOMICNORET = 32781; + int GCNSLEEP = 32782; + int GCNPRIORITY = 32783; + int GCNREGIONALLOC = 32784; + int GCNMSAD = 32785; + int GCNQSAD = 32786; + int GCNMQSAD = 32787; + int GCNMQSAD4 = 32788; + int GCNSADW = 32789; + int GCNSADD = 32790; + int GCNCONSUME = 32791; + int GCNAPPEND = 32792; + int GCNB4XCHG = 32793; + int GCNB32XCHG = 32794; + int GCNMAX = 32795; + int GCNMIN = 32796; + int GCNDIVRELAXED = 32797; + int GCNDIVRELAXEDNARROW = 32798; +} + +def BrigPack : Operand { + let PrintMethod = "printBrigPack"; + int NONE = 0; + int PP = 1; + int PS = 2; + int SP = 3; + int SS = 4; + int S = 5; + int P = 6; + int PPSAT = 7; + int PSSAT = 8; + int SPSAT = 9; + int SSSAT = 10; + int SSAT = 11; + int PSAT = 12; +} + +def BrigProfile : Operand { + let PrintMethod = "printBrigProfile"; + int BASE = 0; + int FULL = 1; + int UNDEF = 2; +} + +def BrigRound : Operand { + let PrintMethod = "printBrigRound"; + int NONE = 0; + int FLOAT_DEFAULT = 1; + int FLOAT_NEAR_EVEN = 2; + int FLOAT_ZERO = 3; + int FLOAT_PLUS_INFINITY = 4; + int FLOAT_MINUS_INFINITY = 5; + int INTEGER_NEAR_EVEN = 6; + int INTEGER_ZERO = 7; + int INTEGER_PLUS_INFINITY = 8; + int INTEGER_MINUS_INFINITY = 9; + int INTEGER_NEAR_EVEN_SAT = 10; + int INTEGER_ZERO_SAT = 11; + int INTEGER_PLUS_INFINITY_SAT = 12; + int INTEGER_MINUS_INFINITY_SAT = 13; + int INTEGER_SIGNALING_NEAR_EVEN = 14; + int INTEGER_SIGNALING_ZERO = 15; + int INTEGER_SIGNALING_PLUS_INFINITY = 16; + int INTEGER_SIGNALING_MINUS_INFINITY = 17; + int INTEGER_SIGNALING_NEAR_EVEN_SAT = 18; + int INTEGER_SIGNALING_ZERO_SAT = 19; + int INTEGER_SIGNALING_PLUS_INFINITY_SAT = 20; + int INTEGER_SIGNALING_MINUS_INFINITY_SAT = 21; +} + +def BrigSamplerAddressing : Operand { + let PrintMethod = "printBrigSamplerAddressing"; + int UNDEFINED = 0; + int CLAMP_TO_EDGE = 1; + int CLAMP_TO_BORDER = 2; + int REPEAT = 3; + int MIRRORED_REPEAT = 4; + int FIRST_USER_DEFINED = 128; +} + +def BrigSamplerCoordNormalization : Operand { + let PrintMethod = "printBrigSamplerCoordNormalization"; + int UNNORMALIZED = 0; + int NORMALIZED = 1; +} + +def BrigSamplerFilter : Operand { + let PrintMethod = "printBrigSamplerFilter"; + int NEAREST = 0; + int LINEAR = 1; + int FIRST_USER_DEFINED = 128; +} + +def BrigSamplerQuery : Operand { + let PrintMethod = "printBrigSamplerQuery"; + int ADDRESSING = 0; + int COORD = 1; + int FILTER = 2; +} + +def BrigSegCvtModifierMask : Operand { + let PrintMethod = "printBrigSegCvtModifierMask"; + int NONULL = 1; +} + +def BrigSegment : Operand { + let PrintMethod = "printBrigSegment"; + int NONE = 0; + int FLAT = 1; + int GLOBAL = 2; + int READONLY = 3; + int KERNARG = 4; + int GROUP = 5; + int PRIVATE = 6; + int SPILL = 7; + int ARG = 8; + int FIRST_USER_DEFINED = 128; + int AMD_GCN = 9; +} + +def AddressSpace : Operand { + let PrintMethod = "printBrigSegment"; + int PRIVATE = 0; + int GLOBAL = 1; + int READONLY = 2; + int GROUP = 3; + int FLAT = 4; + int REGION = 5; + int SPILL = 6; + int KERNARG = 7; + int ARG = 8; +} + +def BrigType : Operand { + let PrintMethod = "printBrigType"; + int NONE = 0; + int U8 = 1; + int U16 = 2; + int U32 = 3; + int U64 = 4; + int S8 = 5; + int S16 = 6; + int S32 = 7; + int S64 = 8; + int F16 = 9; + int F32 = 10; + int F64 = 11; + int B1 = 12; + int B8 = 13; + int B16 = 14; + int B32 = 15; + int B64 = 16; + int B128 = 17; + int SAMP = 18; + int ROIMG = 19; + int WOIMG = 20; + int RWIMG = 21; + int SIG32 = 22; + int SIG64 = 23; + int U8X4 = 33; + int U8X8 = 65; + int U8X16 = 97; + int U16X2 = 34; + int U16X4 = 66; + int U16X8 = 98; + int U32X2 = 67; + int U32X4 = 99; + int U64X2 = 100; + int S8X4 = 37; + int S8X8 = 69; + int S8X16 = 101; + int S16X2 = 38; + int S16X4 = 70; + int S16X8 = 102; + int S32X2 = 71; + int S32X4 = 103; + int S64X2 = 104; + int F16X2 = 41; + int F16X4 = 73; + int F16X8 = 105; + int F32X2 = 74; + int F32X4 = 106; + int F64X2 = 107; + int U8_ARRAY = 129; + int U16_ARRAY = 130; + int U32_ARRAY = 131; + int U64_ARRAY = 132; + int S8_ARRAY = 133; + int S16_ARRAY = 134; + int S32_ARRAY = 135; + int S64_ARRAY = 136; + int F16_ARRAY = 137; + int F32_ARRAY = 138; + int F64_ARRAY = 139; + int B8_ARRAY = 141; + int B16_ARRAY = 142; + int B32_ARRAY = 143; + int B64_ARRAY = 144; + int B128_ARRAY = 145; + int SAMP_ARRAY = 146; + int ROIMG_ARRAY = 147; + int WOIMG_ARRAY = 148; + int RWIMG_ARRAY = 149; + int SIG32_ARRAY = 150; + int SIG64_ARRAY = 151; + int U8X4_ARRAY = 161; + int U8X8_ARRAY = 193; + int U8X16_ARRAY = 225; + int U16X2_ARRAY = 162; + int U16X4_ARRAY = 194; + int U16X8_ARRAY = 226; + int U32X2_ARRAY = 195; + int U32X4_ARRAY = 227; + int U64X2_ARRAY = 228; + int S8X4_ARRAY = 165; + int S8X8_ARRAY = 197; + int S8X16_ARRAY = 229; + int S16X2_ARRAY = 166; + int S16X4_ARRAY = 198; + int S16X8_ARRAY = 230; + int S32X2_ARRAY = 199; + int S32X4_ARRAY = 231; + int S64X2_ARRAY = 232; + int F16X2_ARRAY = 169; + int F16X4_ARRAY = 201; + int F16X8_ARRAY = 233; + int F32X2_ARRAY = 202; + int F32X4_ARRAY = 234; + int F64X2_ARRAY = 235; + int INVALID = -1; +} + +def BrigVariableModifierMask : Operand { + let PrintMethod = "printBrigVariableModifierMask"; + int DEFINITION = 1; + int CONST = 2; +} + +def BrigWidth : Operand { + let PrintMethod = "printBrigWidth"; + int _NONE = 0; + int _1 = 1; + int _2 = 2; + int _4 = 3; + int _8 = 4; + int _16 = 5; + int _32 = 6; + int _64 = 7; + int _128 = 8; + int _256 = 9; + int _512 = 10; + int _1024 = 11; + int _2048 = 12; + int _4096 = 13; + int _8192 = 14; + int _16384 = 15; + int _32768 = 16; + int _65536 = 17; + int _131072 = 18; + int _262144 = 19; + int _524288 = 20; + int _1048576 = 21; + int _2097152 = 22; + int _4194304 = 23; + int _8388608 = 24; + int _16777216 = 25; + int _33554432 = 26; + int _67108864 = 27; + int _134217728 = 28; + int _268435456 = 29; + int _536870912 = 30; + int _1073741824 = 31; + int _2147483648 = 32; + int _WAVESIZE = 33; + int _ALL = 34; + int _LAST = 35; +} + +} Index: lib/Target/HSAIL/HSAILFrameLowering.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILFrameLowering.h @@ -0,0 +1,45 @@ +//=-- HSAILFrameLowering.h - Define HSAIL frame lowering ---------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements HSAIL-specific bits of TargetFrameLowering class. +// +//===----------------------------------------------------------------------===// + +#ifndef LIB_TARGET_HSAIL_HSAILFRAMELOWERING_H +#define LIB_TARGET_HSAIL_HSAILFRAMELOWERING_H + +#include "llvm/Target/TargetFrameLowering.h" + + +namespace llvm { + +class HSAILFrameLowering : public TargetFrameLowering { +public: + explicit HSAILFrameLowering(StackDirection D, unsigned StackAl, int LAO, + unsigned TransAl = 1) + : TargetFrameLowering(D, StackAl, LAO, TransAl) {} + + void emitPrologue(MachineFunction &MF, + MachineBasicBlock &MBB) const override {}; + + void emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const override{} + + bool hasFP(const MachineFunction &MF) const override { return false; } + + int getFrameIndexOffset(const MachineFunction &MF, int FI) const override; + + void processFunctionBeforeFrameFinalized( + MachineFunction &F, + RegScavenger *RS = nullptr) const override; +}; + +} // End llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILFrameLowering.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILFrameLowering.cpp @@ -0,0 +1,105 @@ +//===-- HSAILFrameLowering.cpp --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILFrameLowering.h" +#include "HSAIL.h" +#include "HSAILInstrInfo.h" +#include "HSAILMachineFunctionInfo.h" + +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" + +using namespace llvm; + + +int HSAILFrameLowering::getFrameIndexOffset(const MachineFunction &MF, + int FI) const { + return MF.getFrameInfo()->getObjectOffset(FI); +} + +void HSAILFrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, + RegScavenger *RS) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + DenseMap SpillMap; + DenseMap PrivateMap; + + int64_t SpillSize = 0; + int64_t PrivateSize = 0; + unsigned SpillAlign = 4; + unsigned PrivateAlign = 4; + + for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd(); + I != E; ++I) { + if (MFI->isDeadObjectIndex(I)) + continue; + + unsigned Size = MFI->getObjectSize(I); + unsigned Align = MFI->getObjectAlignment(I); + unsigned Offset = MFI->getObjectOffset(I); + + assert(Offset == 0 && + "Stack object offsets should be 0 before frame finalized"); + + if (MFI->isSpillSlotObjectIndex(I)) { + // Adjust to alignment boundary. + SpillSize = (SpillSize + Align - 1) / Align * Align; + SpillMap[I] = SpillSize; // Offset + + SpillSize += Size; + SpillAlign = std::max(SpillAlign, Align); + } else { + PrivateSize = (PrivateSize + Align - 1) / Align * Align; + PrivateMap[I] = PrivateSize; // Offset + + PrivateSize += Size; + PrivateAlign = std::max(PrivateAlign, Align); + } + + MFI->RemoveStackObject(I); + } + + int PrivateIndex = -1; + int SpillIndex = -1; + + if (PrivateSize != 0) + PrivateIndex = MFI->CreateStackObject(PrivateSize, PrivateAlign, false); + + if (SpillSize != 0) + SpillIndex = MFI->CreateSpillStackObject(SpillSize, SpillAlign); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + unsigned Opc = MI.getOpcode(); + int AddrIdx = HSAIL::getNamedOperandIdx(Opc, HSAIL::OpName::address); + if (AddrIdx == -1) { + // All instructions capable of having a FrameIndex should have an + // address operand. + continue; + } + + MachineOperand &Base = MI.getOperand(AddrIdx); + if (!Base.isFI()) + continue; + + int Index = Base.getIndex(); + MachineOperand &Offset = MI.getOperand(AddrIdx + 2); + int64_t OrigOffset = Offset.getImm(); + + if (MFI->isSpillSlotObjectIndex(Index)) { + Base.setIndex(SpillIndex); + Offset.setImm(SpillMap[Index] + OrigOffset); + } else { + Base.setIndex(PrivateIndex); + Offset.setImm(PrivateMap[Index] + OrigOffset); + } + } + } +} Index: lib/Target/HSAIL/HSAILFusion.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILFusion.td @@ -0,0 +1,532 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Fused instruction patterns +// +// This files contains optimizations, rather than definitions +// essential for the code generation. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// llc command line options predicates +def EnableFPMAD : Predicate<"CurDAG->getTarget().Options.LessPreciseFPMAD()">; +def UnsafeFPMath : Predicate<"CurDAG->getTarget().Options.UnsafeFPMath">; +def NoInfsFPMath : Predicate<"CurDAG->getTarget().Options.NoInfsFPMath">; +def NoNaNsFPMath : Predicate<"CurDAG->getTarget().Options.NoNaNsFPMath">; + +//////////////////////////////////////////////////////////////////////////////// +// fused operation multiclasses +multiclass TernaryFusedPairOp { + def _rrr : Pat< + (op1 (op2 RC:$src0, RC:$src1), RC:$src2), + (inst RC:$src0, RC:$src1, RC:$src2, bt) + >; + + def _rri : Pat< + (op1 (op2 RC:$src0, RC:$src1), (Ty ImmTy:$src2)), + (inst RC:$src0, RC:$src1, ImmTy:$src2, bt) + >; + + def _rir : Pat< + (op1 (op2 RC:$src0, (Ty ImmTy:$src1)), RC:$src2), + (inst RC:$src0, ImmTy:$src1, RC:$src2, bt) + >; + + def _rii : Pat< + (op1 (op2 RC:$src0, (Ty ImmTy:$src1)), (Ty ImmTy:$src2)), + (inst RC:$src0, ImmTy:$src1, ImmTy:$src2, bt) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// fused multiply-add +def getShiftMult32 : SDNodeXFormgetTargetConstant(1u << (N->getZExtValue()), SDLoc(N), MVT::i32); +}]>; + +def getShiftMult64 : SDNodeXFormgetTargetConstant(((uint64_t)1u) << (N->getZExtValue()), + SDLoc(N), MVT::i64); +}]>; + +def shl32imm : PatLeaf<(imm), [{ + return N->getZExtValue() < 32; +}], getShiftMult32>; + +def shl64imm : PatLeaf<(imm), [{ + return N->getZExtValue() < 64; +}], getShiftMult64>; + + +// Pre-optimized (const1 + (x << const2)) -> const1 | (x << const2). +// This requires mutual check of const1 and const2 to ensure last bit# set in +// const1 < const2 (i.e. or can be changed to add). +def orShlAsMad : PatFrag< + (ops node:$reg, node:$shift, node:$mask), + (or (shl node:$reg, node:$shift), node:$mask), [{ + ConstantSDNode *CNShift, *CNMask; + if ((CNShift = dyn_cast(N->getOperand(0)->getOperand(1))) && + (CNMask = dyn_cast(N->getOperand(1)))) { + return (CNMask->getZExtValue() >> CNShift->getZExtValue()) == 0; + } + return false; +}]>; + +// 32 bit integer multiplication is expensive operation on current HW, +// The cost of 32 bit mul is 4 times higher than a cost of add. Therefor +// shift + add fusion is commented out as not beneficial at the moment. +// defm m2ad_u32 : TernaryFusedShlAdd<"mad_u32", i32, i32imm, shl32imm, GPR32>; +// +// There are no 64 bit muls and mads in SI, but there are 64 bit shifts and +// adds. Reversing shifts from multipier is an unneeded burden for SC. +// Therefor folding of shift + add is commented out for 64 bit ops unless we +// have a HW supporing 64 bit mads. +// +// let Predicates = [EnableOpt] in { +// defm m2ad_u64 : TernaryFusedShlAdd<"mad_u64", i64, i64imm, shl64imm, GPR64>; +// } + +// We do not define 64 bit const1 | (x << const2) folding, as we have 64 bit +// or and shift, but no 64 bit mad. +// As 32 bit integer multiplication is curently expensive, optimization is +// commented out. +// let Predicates = [EnableOpt] in { +// def mor_u32 : Pat<(orShlAsMad GPR32:$src0, +// (i32 shl32imm:$src1), (i32 imm:$src2)), +// (umad_rii_u32 GPR32:$src0, shl32imm:$src1, imm:$src2)>; +// } + +let Predicates = [EnableFPMAD] in { + defm : TernaryFusedPairOp; + defm : TernaryFusedPairOp; +} + +//////////////////////////////////////////////////////////////////////////////// +// bit strings +def imm31 : PatLeaf<(imm), [{ + return N->getZExtValue() == 31; +}]>; + +def imm32 : PatLeaf<(imm), [{ + return N->getZExtValue() == 32; +}]>; + +def imm63 : PatLeaf<(imm), [{ + return N->getZExtValue() == 63; +}]>; + +// // Shifts do not need "and {31|63}, shift-bits". +// multiclass ShrOp { + +// def _rr : Pat<(op RC:$src0, (and GPR32:$src1, (i32 ShImm))), +// (!cast(asm##t) RC:$src0, GPR32:$src1)>; + +// def _ir : Pat<(op (Ty imm:$src0), (and GPR32:$src1, (i32 ShImm))), +// (!cast(asm#"_ir"#t) imm:$src0, GPR32:$src1)>; +// } + +// let Predicates = [EnableOpt] in { +// defm shr_u32 : ShrOp<"shr", "_u32", srl, GPR32, i32, imm31>; +// defm shr_s32 : ShrOp<"shr", "_s32", sra, GPR32, i32, imm31>; +// defm shl_u32 : ShrOp<"shl", "_u32", shl, GPR32, i32, imm31>; +// defm shr_u64 : ShrOp<"shr", "_u64", srl, GPR64, i64, imm63>; +// defm shr_s64 : ShrOp<"shr", "_s64", sra, GPR64, i64, imm63>; +// defm shl_u64 : ShrOp<"shl", "_u64", shl, GPR64, i64, imm63>; +// } + +def popCnt : SDNodeXFormgetTargetConstant(countPopulation(N->getZExtValue()), + SDLoc(N), MVT::i32); +}]>; + +def isMask : PatLeaf<(imm), [{ + return isMask_64(N->getZExtValue()); +}]>; + +// Extract masks like (val & 0b0001111000) >> 3 +// Most common use looks like: (x & 0xFF00) >> 8 +class BitExtractOp : Pat< + (and (srl Ty:$src0, (i32 (GPROrImm i32:$src1))), (Ty isMask:$src2)), + (bitextractInst $src0, $src1, (i32 (popCnt $src2)), bt) +>; + +// No signed extract operations are defined since HSAIL specifies extract as +// left + right shifts rather than right shift + and +let Predicates = [EnableOpt], AddedComplexity = 10 in { + def : BitExtractOp; + def : BitExtractOp; +} + +// BFI +def bfiImmIRR : PatFrag< + (ops node:$src0, node:$src1, node:$src2, node:$src4), + (or (and node:$src1, node:$src0), (and node:$src2, node:$src4)), [{ + // check if src1 == ~src4 + ConstantSDNode *CN1, *CN2; + if ((CN1 = dyn_cast(N->getOperand(0)->getOperand(1))) && + (CN2 = dyn_cast(N->getOperand(1)->getOperand(1)))) { + return (CN1->getSExtValue() == ~(CN2->getSExtValue())); + } + return false; +}]>; + +def bfiImmIIR3 : PatFrag< + (ops node:$src0, node:$src1, node:$src2), + (xor (xor node:$src2, (and node:$src2, node:$src0)), node:$src1), [{ + // Check if src1 & src0 == src1. + ConstantSDNode *CN1, *CN2; + if ((CN1 = dyn_cast(N->getOperand(0)->getOperand(1)-> + getOperand(1))) && + (CN2 = dyn_cast(N->getOperand(1)))) { + uint64_t c2 = CN2->getZExtValue(); + return (CN1->getZExtValue() & c2) == c2; + } + return false; +}]>; + +// FIXME: These patterns are pretty fragile and break by commuting +// operands of sources. Many fo them fail on canonicalized IR for the +// pattern they match. +multiclass BitSelect { + def _rrr : Pat< + (or (and Ty:$src0, Ty:$src1), + (and Ty:$src2, (not Ty:$src0))), + (bitselectInst $src0, $src1, $src2, bt) + >; + + def _irr : Pat< + (bfiImmIRR (Ty imm:$src0), Ty:$src1, Ty:$src2, (Ty imm)), (bitselectInst imm:$src0, $src1, $src2, bt) + >; + + def _rir : Pat< + (or (and Ty:$src0, (Ty imm:$src1)), (and Ty:$src2, (not Ty:$src0))), + (bitselectInst $src0, imm:$src1, $src2, bt) + >; + + def _rii : Pat< + (or (and Ty:$src0, (Ty imm:$src1)), + (and (not Ty:$src0), (Ty imm:$src2))), + (bitselectInst $src0, imm:$src1, imm:$src2, bt) + >; + + // Alternative rii pattern: (src0 & src1) | ((src0 & src2) ^ src2) + def _rii1 : Pat< + (or (and Ty:$src0, (Ty imm:$src1)), + (xor (and Ty:$src0, (Ty imm:$src2)), (Ty imm:$src2))), + (bitselectInst $src0, imm:$src1, imm:$src2, bt) + >; + + def _rri : Pat< + (or (and Ty:$src0, Ty:$src1), + (and (not Ty:$src0), (Ty imm:$src2))), + (bitselectInst $src0, $src1, imm:$src2, bt) + >; + + // Alternative rri pattern: (src0 & src1) | ((src0 & src2) ^ src2) + def _rri1 : Pat< + (or (and Ty:$src0, Ty:$src1), + (xor (and Ty:$src0, (Ty imm:$src2)), (Ty imm:$src2))), + (bitselectInst $src0, $src1, imm:$src2, bt) + >; + + // Alternative pattern: (src2 ^ (src0 & (src1 ^ src2))) + let AddedComplexity = 10 in { + def _rrr2 : Pat< + (xor Ty:$src2, (and Ty:$src0, (xor Ty:$src1, Ty:$src2))), + (bitselectInst $src0, $src1, $src2, bt) + >; + } + + let AddedComplexity = 11 in { + // XXX - This is higher priority to fold the immediate. + def _irr2 : Pat< + (xor Ty:$src2, (and (xor Ty:$src1, Ty:$src2), imm:$src0)), + (bitselectInst imm:$src0, $src1, $src2, bt) + >; + + def _iir2 : Pat< + (xor Ty:$src2, (and (xor Ty:$src2, (Ty imm:$src1)), (Ty imm:$src0))), + (bitselectInst imm:$src0, imm:$src1, $src2, bt) + >; + + def _rir2 : Pat< + (xor Ty:$src2, (and Ty:$src0, (xor Ty:$src2, (Ty imm:$src1)))), + (bitselectInst $src0, imm:$src1, $src2, bt) + >; + + def _rri2 : Pat< + (xor (and Ty:$src0, (xor Ty:$src1, (Ty imm:$src2))), (Ty imm:$src2)), + (bitselectInst $src0, $src1, imm:$src2, bt) + >; + } + + // Alternative pattern: ((src0 & src2) ^ src2) ^ (src0 & src1) + let AddedComplexity = 4 in { + def _rrr3 : Pat< + (xor (xor Ty:$src2, (and Ty:$src0, Ty:$src2)), (and Ty:$src0, Ty:$src1)), + (bitselectInst $src0, $src1, $src2, bt) + >; + } + + let AddedComplexity = 5 in { + def _irr3 : Pat< + (xor (xor Ty:$src2, (and Ty:$src2, (Ty imm:$src0))), + (and Ty:$src1, (Ty imm:$src0))), + (bitselectInst imm:$src0, $src1, $src2, bt) + >; + + def _iir3 : Pat< + (bfiImmIIR3 (Ty imm:$src0), (Ty imm:$src1), Ty:$src2), + (bitselectInst imm:$src0, imm:$src1, $src2, bt) + >; + } + + def _rri3 : Pat< + (xor (xor (and Ty:$src0, (Ty imm:$src2)), (Ty imm:$src2)), + (and Ty:$src0, Ty:$src1)), + (bitselectInst $src0, $src1, imm:$src2, bt) + >; + + def _rii3 : Pat< + (xor (xor (and Ty:$src0, (Ty imm:$src2)), (Ty imm:$src2)), + (and Ty:$src0, (Ty imm:$src1))), + (bitselectInst $src0, imm:$src1, imm:$src2, bt) + >; +} + +let Predicates = [EnableOpt] in { +defm : BitSelect; +defm : BitSelect; +} + +// pack + +let Predicates = [EnableOpt], AddedComplexity = 5 in { + def : Pat< + (shl (i64 (anyext i32:$src)), (i32 32)), + (PACK_U32X2_U32 (i64 0), $src, (i32 1), BrigType.U32X2, BrigType.U32) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// reciprocal + +def fp32imm1 : PatLeaf<(f32 fpimm), [{ + return N->isExactlyValue(+1.0); +}]>; + +def fp64imm1 : PatLeaf<(f64 fpimm), [{ + return N->isExactlyValue(+1.0); +}]>; + +def fp32imm_minus1 : PatLeaf<(f32 fpimm), [{ + return N->isExactlyValue(-1.0); +}]>; + +def fp64imm_minus1 : PatLeaf<(f64 fpimm), [{ + return N->isExactlyValue(-1.0); +}]>; + +let Predicates = [UnsafeFPMath] in { + // Pure 1.0 / x + let AddedComplexity = 5 in { + def : Pat< + (fdiv fp32imm1, f32:$src), + (NRCP_F32 $src, BrigType.F32) + >; + + def : Pat< + (fdiv fp64imm1, f64:$src), + (NRCP_F64 $src, BrigType.F64) + >; + } + + // -1.0 / x + let AddedComplexity = 4 in { + def : Pat< + (fdiv fp32imm_minus1, f32:$src), + (NEG_F32 (f32 (NRCP_F32 $src, BrigType.F32)), BrigType.F32) + >; + + def : Pat< + (fdiv fp64imm_minus1, f64:$src), + (NEG_F64 (f64 (NRCP_F64 $src, BrigType.F64)), BrigType.F64) + >; + } + + let AddedComplexity = 5 in { + def : Pat< + (fdiv fp32imm_minus1, (fneg f32:$src)), + (NRCP_F32 $src, BrigType.F32) + >; + + def : Pat< + (fdiv fp64imm_minus1, (fneg f64:$src)), + (NRCP_F64 $src, BrigType.F64) + >; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// rsqrt + +let Predicates = [UnsafeFPMath] in { + // Pure 1.0 / sqrt(x) + let AddedComplexity = 15 in { + def : Pat< + (fdiv fp32imm1, (int_HSAIL_nsqrt_f32 f32:$src)), + (NRSQRT_F32 $src, BrigType.F32) + >; + + def : Pat< + (fdiv fp64imm1, (int_HSAIL_nsqrt_f64 f64:$src)), + (NRSQRT_F64 $src, BrigType.F64) + >; + } + + let AddedComplexity = 10 in { + def : Pat< + (fdiv f32:$src0, (int_HSAIL_nsqrt_f32 f32:$src1)), + (f32 (MUL_F32 1, 0, $src0, (f32 (NRSQRT_F32 $src1, BrigType.F32)), BrigType.F32)) + >; + + def : Pat< + (f32 (fdiv fpimm:$src0, (int_HSAIL_nsqrt_f32 f32:$src1))), + (f32 (MUL_F32 1, 0, fpimm:$src0, (f32 (NRSQRT_F32 $src1, BrigType.F32)), BrigType.F32)) + >; + + def : Pat< + (f64 (fdiv GPR64:$src0, (int_HSAIL_nsqrt_f64 f64:$src1))), + (f64 (MUL_F64 0, 0, $src0, (f64 (NRSQRT_F64 $src1, BrigType.F64)), BrigType.F64)) + >; + + def : Pat< + (f64 (fdiv fpimm:$src0, (int_HSAIL_nsqrt_f64 f64:$src1))), + (f64 (MUL_F64 0, 0, fpimm:$src0, (f64 (NRSQRT_F64 (f64 GPR64:$src1), BrigType.F64)), BrigType.F64)) + >; + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Min/Max + +// A 'setcc' node with a single use. +def setcc_su : PatFrag<(ops node:$lhs, node:$rhs, node:$cc), + (setcc node:$lhs, node:$rhs, node:$cc), [{ + return N->hasOneUse(); +}]>; + +multiclass minmax { + def : Pat< + (select (i1 (setcc_su Ty:$src0, Ty:$src1, cc12)), Ty:$src0, Ty:$src1), + (inst ftz, round, $src0, $src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su Ty:$src0, ImmTy:$src1, cc12)), Ty:$src0, ImmTy:$src1), + (inst ftz, round, $src0, ImmTy:$src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su ImmTy:$src0, Ty:$src1, cc12)), ImmTy:$src0, Ty:$src1), + (inst ftz, round, ImmTy:$src0, $src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su Ty:$src0, Ty:$src1, cc21)), Ty:$src1, Ty:$src0), + (inst ftz, round, $src0, $src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su Ty:$src0, ImmTy:$src1, cc21)), ImmTy:$src1, Ty:$src0), + (inst ftz, round, $src0, ImmTy:$src1, bt) + >; + + def : Pat< + (select (i1 (setcc_su ImmTy:$src0, Ty:$src1, cc21)), Ty:$src1, ImmTy:$src0), + (inst ftz, round, ImmTy:$src0, $src1, bt) + >; +} + +// TODO: This should be moved to a DAG combine. This currently gets +// confused by canonicalizations of a compare with a constant. le/ge +// comparisons with a constant are canonicalized to lt/gt with the +// constant incremented, which breaks the simple pattern. +let Predicates = [EnableOpt] in { + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; + defm : minmax; +} + +// Abs +let Predicates = [EnableOpt] in { + def : Pat< + (xor (add (sra i32:$src, (i32 31)), i32:$src), + (sra i32:$src, (i32 31))), + (ABS_S32 0, 0, $src, BrigType.S32) + >; + + def : Pat< + (xor (add (sra i64:$src, (i32 63)), i64:$src), + (sra i64:$src, (i32 63))), + (ABS_S64 0, 0, $src, BrigType.S64) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// fadd y (fmul x, 1) +multiclass FusedAddMul1 { + def _rri : Pat< + (fadd Ty:$src0, (fmul Ty:$src1, one)), + (op ftz, round, $src0, $src1, bt) + >; + + def _iri : Pat< + (fadd (fmul Ty:$src0, one), (Ty fpimm:$src1)), + (op ftz, round, $src0, (Ty fpimm:$src1), bt) + >; +} + +// FIXME: Change to default rounding mode +let Predicates = [EnableOpt] in { + defm addmul1_f32 : FusedAddMul1; + defm addmul1_f64 : FusedAddMul1; +} + +let Predicates = [EnableOpt] in { + def : Pat< + (or (shl i32:$src0, i32:$src1), + (srl i32:$src0, (sub imm32, (and i32:$src1, imm31)))), + (BITALIGN_B32 $src0, $src0, (i32 (NEG_S32 $src1, BrigType.S32)), BrigType.B32) + >; +} Index: lib/Target/HSAIL/HSAILISelDAGToDAG.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILISelDAGToDAG.cpp @@ -0,0 +1,1188 @@ +//=- HSAILISelDAGToDAG.cpp - A DAG pattern matching inst selector for HSAIL -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a DAG pattern matching instruction selector for HSAIL, +// converting from a legalized dag to a HSAIL dag. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hsail-isel" +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILInstrInfo.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILRegisterInfo.h" +#include "HSAILSubtarget.h" +#include "HSAILTargetMachine.h" +#include "HSAILUtilityFunctions.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Pattern Matcher Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { +void initializeHSAILDAGToDAGISelPass(PassRegistry &); +} + +namespace { +//===--------------------------------------------------------------------===// +/// ISel - HSAIL specific code to select HSAIL machine instructions for +/// SelectionDAG operations. +/// +class HSAILDAGToDAGISel : public SelectionDAGISel { + /// Subtarget - Keep a pointer to the HSAILSubtarget around so that we can + /// make the right decision when generating code for different targets. + const HSAILSubtarget *Subtarget; + +public: + explicit HSAILDAGToDAGISel(TargetMachine &TM) + : SelectionDAGISel(TM), Subtarget(nullptr) {} + + virtual ~HSAILDAGToDAGISel() {} + + const char *getPassName() const override { + return "HSAIL DAG->DAG Instruction Selection"; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + Subtarget = &MF.getSubtarget(); + return SelectionDAGISel::runOnMachineFunction(MF); + } + + bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; + +private: + SDNode *Select(SDNode *N) override; + + SDNode *SelectINTRINSIC_WO_CHAIN(SDNode *Node); + SDNode *SelectINTRINSIC_W_CHAIN(SDNode *Node); + + SDNode *SelectImageIntrinsic(SDNode *Node); + SDNode *SelectActiveLaneMask(SDNode *Node); + // Helper for SelectAddrCommon + // Checks that OR operation is semantically equivalent to ADD + bool IsOREquivalentToADD(SDValue Op) const; + + bool SelectAddrCommon(SDValue Addr, SDValue &Base, SDValue &Reg, + int64_t &Offset, MVT ValueType, int Depth) const; + + bool SelectAddr(SDValue N, SDValue &Base, SDValue &Reg, + SDValue &Offset) const; + + bool SelectLoadAddr(SDNode *ParentLoad, SDValue Addr, SDValue &Base, + SDValue &Reg, SDValue &Offset, SDValue &Segment, + SDValue &Align, SDValue &Type, SDValue &Width, + SDValue &ModifierMask) const; + + bool SelectStoreAddr(SDNode *ParentStore, SDValue Addr, SDValue &Base, + SDValue &Reg, SDValue &Offset, SDValue &Segment, + SDValue &Align, + /*SDValue &Equiv,*/ + SDValue &Type) const; + + bool SelectAtomicAddr(SDNode *ParentLoad, SDValue Addr, SDValue &Segment, + SDValue &Order, SDValue &Scope, SDValue &Equiv, + + SDValue &Base, SDValue &Reg, SDValue &Offset) const; + + void SelectAddrSpaceCastCommon(const AddrSpaceCastSDNode &ASC, + SDValue &NoNull, SDValue &Ptr, + SDValue &DestType, SDValue &SrcType) const; + + SDNode *SelectAddrSpaceCast(AddrSpaceCastSDNode *ASC) const; + + SDNode *SelectSetCC(SDNode *SetCC) const; + + SDNode *SelectArgLd(MemSDNode *SetCC) const; + SDNode *SelectArgSt(MemSDNode *SetCC) const; + + bool SelectGPROrImm(SDValue In, SDValue &Src) const; + bool MemOpHasPtr32(SDNode *N) const; + + bool isKernelFunc(void) const; +// Include the pieces autogenerated from the target description. +#include "HSAILGenDAGISel.inc" +}; +} + +static BrigType getBrigType(MVT::SimpleValueType VT, bool Signed) { + switch (VT) { + case MVT::i32: + return Signed ? BRIG_TYPE_S32 : BRIG_TYPE_U32; + case MVT::f32: + return BRIG_TYPE_F32; + case MVT::i8: + return Signed ? BRIG_TYPE_S8 : BRIG_TYPE_U8; + case MVT::i16: + return Signed ? BRIG_TYPE_S16 : BRIG_TYPE_U16; + case MVT::i64: + return Signed ? BRIG_TYPE_S64 : BRIG_TYPE_U64; + case MVT::f64: + return BRIG_TYPE_F64; + case MVT::i1: + return BRIG_TYPE_B1; + default: + llvm_unreachable("Unhandled type for MVT -> BRIG"); + } +} + +static BrigType getBrigTypeFromStoreType(MVT::SimpleValueType VT) { + switch (VT) { + case MVT::i32: + return BRIG_TYPE_U32; + case MVT::f32: + return BRIG_TYPE_F32; + case MVT::i8: + return BRIG_TYPE_U8; + case MVT::i16: + return BRIG_TYPE_U16; + case MVT::i64: + return BRIG_TYPE_U64; + case MVT::f64: + return BRIG_TYPE_F64; + default: + llvm_unreachable("Unhandled type for MVT -> BRIG"); + } +} + +bool HSAILDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + return true; +} + +bool HSAILDAGToDAGISel::SelectGPROrImm(SDValue In, SDValue &Src) const { + if (ConstantSDNode *C = dyn_cast(In)) { + SDLoc SL(In); + Src = CurDAG->getTargetConstant(C->getAPIntValue(), SL, C->getValueType(0)); + } else if (ConstantFPSDNode *C = dyn_cast(In)) { + SDLoc SL(In); + Src = CurDAG->getTargetConstantFP(C->getValueAPF(), SL, C->getValueType(0)); + } else + Src = In; + + return true; +} + +bool HSAILDAGToDAGISel::isKernelFunc() const { + const MachineFunction &MF = CurDAG->getMachineFunction(); + return HSAIL::isKernelFunc(MF.getFunction()); +} + +static unsigned getImageInstr(HSAILIntrinsic::ID intr) { + switch (intr) { + default: + llvm_unreachable("unexpected intrinsinc ID for images"); + case HSAILIntrinsic::HSAIL_rd_imgf_1d_f32: + return HSAIL::rd_imgf_1d_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_1d_s32: + return HSAIL::rd_imgf_1d_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_1da_f32: + return HSAIL::rd_imgf_1da_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_1da_s32: + return HSAIL::rd_imgf_1da_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_2d_f32: + return HSAIL::rd_imgf_2d_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_2d_s32: + return HSAIL::rd_imgf_2d_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_2da_f32: + return HSAIL::rd_imgf_2da_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_2da_s32: + return HSAIL::rd_imgf_2da_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_3d_f32: + return HSAIL::rd_imgf_3d_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_3d_s32: + return HSAIL::rd_imgf_3d_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_1d_f32: + return HSAIL::rd_imgi_1d_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_1d_s32: + return HSAIL::rd_imgi_1d_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_1da_f32: + return HSAIL::rd_imgi_1da_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_1da_s32: + return HSAIL::rd_imgi_1da_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_2d_f32: + return HSAIL::rd_imgi_2d_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_2d_s32: + return HSAIL::rd_imgi_2d_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_2da_f32: + return HSAIL::rd_imgi_2da_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_2da_s32: + return HSAIL::rd_imgi_2da_s32; + case HSAILIntrinsic::HSAIL_rd_imgi_3d_f32: + return HSAIL::rd_imgi_3d_f32; + case HSAILIntrinsic::HSAIL_rd_imgi_3d_s32: + return HSAIL::rd_imgi_3d_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_1d_f32: + return HSAIL::rd_imgui_1d_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_1d_s32: + return HSAIL::rd_imgui_1d_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_1da_f32: + return HSAIL::rd_imgui_1da_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_1da_s32: + return HSAIL::rd_imgui_1da_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_2d_f32: + return HSAIL::rd_imgui_2d_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_2d_s32: + return HSAIL::rd_imgui_2d_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_2da_f32: + return HSAIL::rd_imgui_2da_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_2da_s32: + return HSAIL::rd_imgui_2da_s32; + case HSAILIntrinsic::HSAIL_rd_imgui_3d_f32: + return HSAIL::rd_imgui_3d_f32; + case HSAILIntrinsic::HSAIL_rd_imgui_3d_s32: + return HSAIL::rd_imgui_3d_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_f32: + return HSAIL::rd_imgf_2ddepth_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_s32: + return HSAIL::rd_imgf_2ddepth_s32; + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_f32: + return HSAIL::rd_imgf_2dadepth_f32; + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_s32: + return HSAIL::rd_imgf_2dadepth_s32; + + case HSAILIntrinsic::HSAIL_ld_imgf_1d_u32: + return HSAIL::ld_imgf_1d_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_1da_u32: + return HSAIL::ld_imgf_1da_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_1db_u32: + return HSAIL::ld_imgf_1db_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_2d_u32: + return HSAIL::ld_imgf_2d_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_2da_u32: + return HSAIL::ld_imgf_2da_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_3d_u32: + return HSAIL::ld_imgf_3d_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_1d_u32: + return HSAIL::ld_imgi_1d_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_1da_u32: + return HSAIL::ld_imgi_1da_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_1db_u32: + return HSAIL::ld_imgi_1db_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_2d_u32: + return HSAIL::ld_imgi_2d_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_2da_u32: + return HSAIL::ld_imgi_2da_u32; + case HSAILIntrinsic::HSAIL_ld_imgi_3d_u32: + return HSAIL::ld_imgi_3d_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_1d_u32: + return HSAIL::ld_imgui_1d_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_1da_u32: + return HSAIL::ld_imgui_1da_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_1db_u32: + return HSAIL::ld_imgui_1db_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_2d_u32: + return HSAIL::ld_imgui_2d_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_2da_u32: + return HSAIL::ld_imgui_2da_u32; + case HSAILIntrinsic::HSAIL_ld_imgui_3d_u32: + return HSAIL::ld_imgui_3d_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_2ddepth_u32: + return HSAIL::ld_imgf_2ddepth_u32; + case HSAILIntrinsic::HSAIL_ld_imgf_2dadepth_u32: + return HSAIL::ld_imgf_2dadepth_u32; + } +} + +SDNode *HSAILDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *Node) { + unsigned IntID = cast(Node->getOperand(0))->getZExtValue(); + switch (IntID) { + case HSAILIntrinsic::HSAIL_ftz_f32: { + SDLoc SL(Node); + + // This is a workaround for not being able to create fpimm in an output + // pattern. + const SDValue Ops[] = { + CurDAG->getTargetConstant(1, SL, MVT::i1), // ftz + CurDAG->getTargetConstant(BRIG_ROUND_FLOAT_DEFAULT, SL, MVT::i32), // round + Node->getOperand(1), // src0 + CurDAG->getConstantFP(0.0, SL, MVT::f32), // src1 + CurDAG->getTargetConstant(BRIG_TYPE_F32, SL, MVT::i32) // TypeLength + }; + + return CurDAG->SelectNodeTo(Node, HSAIL::ADD_F32, MVT::f32, Ops); + } + case HSAILIntrinsic::HSAIL_mul_ftz_f32: { + SDLoc SL(Node); + + // This is a workaround for not being able to create fpimm in an output + // pattern. + const SDValue Ops[] = { + CurDAG->getTargetConstant(1, SL, MVT::i1), // ftz + CurDAG->getTargetConstant(BRIG_ROUND_FLOAT_DEFAULT, SL, MVT::i32), // round + Node->getOperand(1), // src0 + CurDAG->getConstantFP(BitsToFloat(0x3f800000), SL, MVT::f32), // src1 + CurDAG->getTargetConstant(BRIG_TYPE_F32, SL, MVT::i32) // TypeLength + }; + + return CurDAG->SelectNodeTo(Node, HSAIL::MUL_F32, MVT::f32, Ops); + } + default: + return SelectCode(Node); + } +} + +SDNode *HSAILDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *Node) { + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + if (HSAILIntrinsicInfo::isReadImage((HSAILIntrinsic::ID)IntNo) || + HSAILIntrinsicInfo::isLoadImage((HSAILIntrinsic::ID)IntNo)) + return SelectImageIntrinsic(Node); + + return SelectCode(Node); +} + +SDNode *HSAILDAGToDAGISel::SelectImageIntrinsic(SDNode *Node) { + SDValue Chain = Node->getOperand(0); + SDNode *ResNode; + + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + bool hasSampler = false; + + if (IntNo == HSAILIntrinsic::HSAIL_rd_imgf_1d_s32) { + SDLoc SL; + const SDValue Ops[] = { + CurDAG->getTargetConstant(1, SL, MVT::i1), // v4 + CurDAG->getTargetConstant(BRIG_TYPE_ROIMG, SL, MVT::i32), // imageType + CurDAG->getTargetConstant(BRIG_TYPE_S32, SL, MVT::i32), // coordType + CurDAG->getTargetConstant(BRIG_GEOMETRY_1D, SL, MVT::i32), // geometry + CurDAG->getTargetConstant(0, SL, MVT::i32), // equiv + Node->getOperand(2), // image + Node->getOperand(3), // sampler + Node->getOperand(4), // coordWidth + CurDAG->getTargetConstant(BRIG_TYPE_F32, SL, MVT::i32), // destType + Chain + }; + + return CurDAG->SelectNodeTo(Node, HSAIL::RDIMAGE, Node->getVTList(), Ops); + } + + if (HSAILIntrinsicInfo::isReadImage((HSAILIntrinsic::ID)IntNo)) { + hasSampler = true; + } else if (!HSAILIntrinsicInfo::isLoadImage((HSAILIntrinsic::ID)IntNo)) { + return SelectCode(Node); + } + + if (((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_f32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_s32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_ld_imgf_2ddepth_u32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_f32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_s32) || + ((HSAILIntrinsic::ID)IntNo) == + (HSAILIntrinsic::HSAIL_ld_imgf_2dadepth_u32)) { + assert(Node->getNumValues() == 2); + } else { + assert(Node->getNumValues() == 5); + } + SmallVector NewOps; + + unsigned OpIndex = 2; + + SDValue Img = Node->getOperand(OpIndex++); + int ResNo = Img.getResNo(); + SDValue ImgHandle = Img.getValue(ResNo); + NewOps.push_back(ImgHandle); + + if (hasSampler) { + SDValue Smp = Node->getOperand(OpIndex++); + SDValue SmpHandle = Smp.getValue(Smp.getResNo()); + NewOps.push_back(SmpHandle); + } + + while (OpIndex < Node->getNumOperands()) { + SDValue Coord = Node->getOperand(OpIndex++); + NewOps.push_back(Coord); + } + + NewOps.push_back(Chain); + + ResNode = CurDAG->SelectNodeTo(Node, getImageInstr((HSAILIntrinsic::ID)IntNo), + Node->getVTList(), NewOps); + return ResNode; +} + +SDNode *HSAILDAGToDAGISel::SelectActiveLaneMask(SDNode *Node) { + SDLoc SL(Node); + SDValue Ops[] = { + Node->getOperand(1), // width + Node->getOperand(2), // src0 + CurDAG->getTargetConstant(BRIG_TYPE_B64, SL, MVT::i32), // TypeLength + CurDAG->getTargetConstant(BRIG_TYPE_B1, SL, MVT::i32), // sourceType + Node->getOperand(0) // Chain + }; + + SelectGPROrImm(Ops[1], Ops[1]); + + return CurDAG->SelectNodeTo(Node, HSAIL::ACTIVELANEMASK_V4_B64_B1, + Node->getVTList(), Ops); +} + +SDNode *HSAILDAGToDAGISel::Select(SDNode *Node) { + assert(Node); + + EVT NVT = Node->getValueType(0); + unsigned Opcode = Node->getOpcode(); + SDNode *ResNode; + + DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n'); + + if (Node->isMachineOpcode()) { + DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); + return nullptr; // Already selected. + } + + switch (Opcode) { + default: + ResNode = SelectCode(Node); + break; + case ISD::SETCC: + ResNode = SelectSetCC(Node); + break; + case ISD::FrameIndex: { + if (FrameIndexSDNode *FIN = dyn_cast(Node)) { + SDLoc SL(Node); + SDValue Ops[] = { + CurDAG->getTargetConstant(HSAILAS::PRIVATE_ADDRESS, SL, MVT::i32), + CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32), + CurDAG->getRegister(0, NVT), + CurDAG->getTargetConstant(0, SL, NVT), + CurDAG->getTargetConstant(BRIG_TYPE_U32, SL, MVT::i32) + }; + + ResNode = CurDAG->SelectNodeTo(Node, HSAIL::LDA_U32, NVT, Ops); + } else { + ResNode = Node; + } + break; + } + case ISD::GlobalAddress: { + const GlobalAddressSDNode *GSDN = cast(Node); + const GlobalValue *GV = GSDN->getGlobal(); + EVT PtrVT = Node->getValueType(0); + unsigned AS = GSDN->getAddressSpace(); + SDLoc SL(Node); + + BrigType BT = (PtrVT == MVT::i32) ? BRIG_TYPE_U32 : BRIG_TYPE_U64; + unsigned Opcode = (PtrVT == MVT::i32) ? HSAIL::LDA_U32 : HSAIL::LDA_U64; + + const SDValue Ops[] = { + CurDAG->getTargetConstant(AS, SL, MVT::i32), + CurDAG->getTargetGlobalAddress(GV, SL, PtrVT, 0), + CurDAG->getRegister(HSAIL::NoRegister, NVT), + CurDAG->getTargetConstant(GSDN->getOffset(), SL, PtrVT), + CurDAG->getTargetConstant(BT, SL, MVT::i32) + }; + + ResNode = CurDAG->SelectNodeTo(Node, Opcode, PtrVT, Ops); + break; + } + case ISD::INTRINSIC_WO_CHAIN: + ResNode = SelectINTRINSIC_WO_CHAIN(Node); + break; + case ISD::INTRINSIC_W_CHAIN: + ResNode = SelectINTRINSIC_W_CHAIN(Node); + break; + + case ISD::CALLSEQ_START: { + // LLVM 3.6 unable to select start/end of call sequence chained with the + // rest of the arg scope operations due to the WalkChainUsers check which + // reports it may induce a cycle in the graph, so select it manually. + ResNode = CurDAG->SelectNodeTo(Node, HSAIL::ARG_SCOPE_START, MVT::Other, + MVT::Glue, + Node->getOperand(1), // src0 + Node->getOperand(0)); // Chain + break; + } + case ISD::CALLSEQ_END: { + const SDValue Ops[] = { + Node->getOperand(1), // src0 + Node->getOperand(2), // src1 + Node->getOperand(0), // Chain + Node->getOperand(3) // Glue + }; + + ResNode = CurDAG->SelectNodeTo(Node, HSAIL::ARG_SCOPE_END, MVT::Other, + MVT::Glue, Ops); + break; + } + case HSAILISD::ARG_LD: { + ResNode = SelectArgLd(cast(Node)); + break; + } + case HSAILISD::ARG_ST: { + ResNode = SelectArgSt(cast(Node)); + break; + } + case ISD::ADDRSPACECAST: { + ResNode = SelectAddrSpaceCast(cast(Node)); + break; + } + case HSAILISD::ACTIVELANEMASK: + return SelectActiveLaneMask(Node); + } + + return ResNode; +} + +bool HSAILDAGToDAGISel::IsOREquivalentToADD(SDValue Op) const { + assert(Op.getOpcode() == ISD::OR); + + SDValue N0 = Op->getOperand(0); + SDValue N1 = Op->getOperand(1); + EVT VT = N0.getValueType(); + + // Highly inspired by (a|b) case in DAGCombiner::visitADD + if (VT.isInteger() && !VT.isVector()) { + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + CurDAG->computeKnownBits(N0, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + CurDAG->computeKnownBits(N1, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return yes. + // If all possibly-set bits on the RHS are clear on the LHS, return yes. + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) + return true; + } + } + + // Fallback to the more conservative check + return CurDAG->isBaseWithConstantOffset(Op); +} + +/// \brief Return true if the pointer is 32-bit in large and small models +static bool addrSpaceHasPtr32(unsigned AS) { + switch (AS) { + default: + return false; + + case HSAILAS::GROUP_ADDRESS: + case HSAILAS::ARG_ADDRESS: + case HSAILAS::PRIVATE_ADDRESS: + case HSAILAS::SPILL_ADDRESS: + return true; + } +} + +/// We accept an SDNode to keep things simple in the TD files. The +/// cast to MemSDNode will never assert because this predicate is only +/// used in a pattern fragment that matches load or store nodes. +bool HSAILDAGToDAGISel::MemOpHasPtr32(SDNode *N) const { + return addrSpaceHasPtr32(cast(N)->getAddressSpace()); +} + +bool HSAILDAGToDAGISel::SelectAddrCommon(SDValue Addr, SDValue &Base, + SDValue &Reg, int64_t &Offset, + MVT ValueType, int Depth) const { + if (Depth > 5) + return false; + + SDValue backup_base = Base, backup_reg = Reg; + int64_t backup_offset = Offset; + + switch (Addr.getOpcode()) { + case ISD::Constant: { + int64_t new_offset = cast(Addr)->getSExtValue(); + // No 64 bit offsets in 32 bit target + if (Subtarget->isSmallModel() && !isInt<32>(new_offset)) + return false; + Offset += new_offset; + return true; + } + case ISD::FrameIndex: { + if (Base.getNode() == 0) { + Base = CurDAG->getTargetFrameIndex( + cast(Addr)->getIndex(), ValueType); + return true; + } + break; + } + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::TargetGlobalTLSAddress: { + if (Base.getNode() == 0) { + Base = CurDAG->getTargetGlobalAddress( + cast(Addr)->getGlobal(), SDLoc(Addr), ValueType); + int64_t new_offset = + Offset + cast(Addr)->getOffset(); + if (Subtarget->isSmallModel() && !isInt<32>(new_offset)) + return false; + Offset += new_offset; + return true; + } + break; + } + case ISD::TargetExternalSymbol: { + if (Base.getNode() == 0) { + Base = Addr; + return true; + } + break; + } + case ISD::OR: // Treat OR as ADD when Op1 & Op2 == 0 + if (IsOREquivalentToADD(Addr)) { + bool can_selec_first_op = SelectAddrCommon(Addr.getOperand(0), Base, Reg, + Offset, ValueType, Depth + 1); + + if (can_selec_first_op && SelectAddrCommon(Addr.getOperand(1), Base, Reg, + Offset, ValueType, Depth + 1)) + return true; + Base = backup_base; + Reg = backup_reg; + Offset = backup_offset; + } + break; + case ISD::ADD: { + bool can_selec_first_op = SelectAddrCommon(Addr.getOperand(0), Base, Reg, + Offset, ValueType, Depth + 1); + + if (can_selec_first_op && SelectAddrCommon(Addr.getOperand(1), Base, Reg, + Offset, ValueType, Depth + 1)) + return true; + Base = backup_base; + Reg = backup_reg; + Offset = backup_offset; + break; + } + case HSAILISD::LDA: { + if (SelectAddrCommon(Addr.getOperand(1), Base, Reg, Offset, ValueType, + Depth + 1)) { + return true; + } + Base = backup_base; + Reg = backup_reg; + Offset = backup_offset; + break; + } + default: + break; + } + + // By default generate address as register + if (Reg.getNode() == 0) { + Reg = Addr; + return true; + } + return false; +} + +/// SelectAddr - returns true if it is able pattern match an addressing mode. +/// It returns the operands which make up the maximal addressing mode it can +/// match by reference. +/// +/// Parent is the parent node of the addr operand that is being matched. It +/// is always a load, store, atomic node, or null. It is only null when +/// checking memory operands for inline asm nodes. +bool HSAILDAGToDAGISel::SelectAddr(SDValue Addr, SDValue &Base, SDValue &Reg, + SDValue &Offset) const { + MVT VT = Addr.getValueType().getSimpleVT(); + assert(VT == MVT::i32 || VT == MVT::i64); + + int64_t disp = 0; + if (!SelectAddrCommon(Addr, Base, Reg, disp, VT, 0)) + return false; + + SDLoc SL(Addr); + Offset = CurDAG->getTargetConstant(disp, SL, VT); + if (Base.getNode() == 0) + Base = CurDAG->getRegister(0, VT); + if (Reg.getNode() == 0) + Reg = CurDAG->getRegister(0, VT); + return true; +} + +bool HSAILDAGToDAGISel::SelectLoadAddr(SDNode *ParentLoad, SDValue Addr, + SDValue &Base, SDValue &Reg, + SDValue &Offset, SDValue &Segment, + SDValue &Align, SDValue &Type, + SDValue &Width, + SDValue &ModifierMask) const { + const LoadSDNode *Load = cast(ParentLoad); + assert(!Load->isIndexed()); + + if (!SelectAddr(Addr, Base, Reg, Offset)) + return false; + + unsigned AS = Load->getAddressSpace(); + + SDLoc SL(ParentLoad); + + MVT MemVT = Load->getMemoryVT().getSimpleVT(); + ISD::LoadExtType ExtTy = Load->getExtensionType(); + unsigned BrigType = getBrigType(MemVT.SimpleTy, ExtTy == ISD::SEXTLOAD); + + Segment = CurDAG->getTargetConstant(AS, SL, MVT::i32); + Align = CurDAG->getTargetConstant(Load->getAlignment(), SL, MVT::i32); + Type = CurDAG->getTargetConstant(BrigType, SL, MVT::i32); + Width = CurDAG->getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32); + + // TODO: Set if invariant. + ModifierMask = CurDAG->getTargetConstant(0, SL, MVT::i32); + return true; +} + +bool HSAILDAGToDAGISel::SelectStoreAddr(SDNode *ParentStore, SDValue Addr, + SDValue &Base, SDValue &Reg, + SDValue &Offset, SDValue &Segment, + SDValue &Align, + /*SDValue &Equiv,*/ + SDValue &Type) const { + const StoreSDNode *Store = cast(ParentStore); + assert(!Store->isIndexed()); + + if (!SelectAddr(Addr, Base, Reg, Offset)) + return false; + + unsigned AS = Store->getAddressSpace(); + + MVT MemVT = Store->getMemoryVT().getSimpleVT(); + unsigned BrigType = getBrigTypeFromStoreType(MemVT.SimpleTy); + + SDLoc SL(ParentStore); + + Segment = CurDAG->getTargetConstant(AS, SL, MVT::i32); + Align = CurDAG->getTargetConstant(Store->getAlignment(), SL, MVT::i32); + // Equiv = CurDAG->getTargetConstant(0, SL, MVT::i32); + Type = CurDAG->getTargetConstant(BrigType, SL, MVT::i32); + return true; +} + +static BrigMemoryOrder getBrigMemoryOrder(AtomicOrdering Order) { + switch (Order) { + case Monotonic: + return BRIG_MEMORY_ORDER_RELAXED; + case Acquire: + return BRIG_MEMORY_ORDER_SC_ACQUIRE; + case Release: + return BRIG_MEMORY_ORDER_SC_RELEASE; + case AcquireRelease: + case SequentiallyConsistent: + return BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE; + default: + llvm_unreachable("unhandled memory order"); + } +} + +static BrigMemoryScope getBrigMemoryScope(SynchronizationScope Scope, + unsigned AS) { + switch (Scope) { + case SingleThread: + return BRIG_MEMORY_SCOPE_WORKITEM; + case CrossThread: + // FIXME: This needs to be fixed when LLVM support other scope values. + switch (AS) { + case HSAILAS::GROUP_ADDRESS: + return BRIG_MEMORY_SCOPE_WORKGROUP; + case HSAILAS::REGION_ADDRESS: + return BRIG_MEMORY_SCOPE_AGENT; + default: + return BRIG_MEMORY_SCOPE_SYSTEM; + } + } +} + +bool HSAILDAGToDAGISel::SelectAtomicAddr(SDNode *ParentAtomic, SDValue Addr, + SDValue &Segment, SDValue &Order, + SDValue &Scope, SDValue &Equiv, + + SDValue &Base, SDValue &Reg, + SDValue &Offset) const { + if (!SelectAddr(Addr, Base, Reg, Offset)) + return false; + + const AtomicSDNode *Atomic = cast(ParentAtomic); + + // XXX - What do we do with the failure ordering? + AtomicOrdering SuccOrder = Atomic->getSuccessOrdering(); + SynchronizationScope SyncScope = Atomic->getSynchScope(); + + unsigned AS = Atomic->getAddressSpace(); + + SDLoc SL(ParentAtomic); + + Segment = CurDAG->getTargetConstant(AS, SL, MVT::i32); + Order = CurDAG->getTargetConstant(getBrigMemoryOrder(SuccOrder), SL, MVT::i32); + Scope = + CurDAG->getTargetConstant(getBrigMemoryScope(SyncScope, AS), SL, MVT::i32); + Equiv = CurDAG->getTargetConstant(0, SL, MVT::i32); + + return true; +} + +void HSAILDAGToDAGISel::SelectAddrSpaceCastCommon( + const AddrSpaceCastSDNode &ASC, SDValue &NoNull, SDValue &Ptr, + SDValue &DestType, SDValue &SrcType) const { + SDLoc SL(&ASC); + + SelectGPROrImm(ASC.getOperand(0), Ptr); + NoNull = CurDAG->getTargetConstant(0, SL, MVT::i1); + + BrigType DestBT = + getBrigType(ASC.getValueType(0).getSimpleVT().SimpleTy, false); + BrigType SrcBT = + getBrigType(Ptr.getValueType().getSimpleVT().SimpleTy, false); + + DestType = CurDAG->getTargetConstant(DestBT, SL, MVT::i32); + SrcType = CurDAG->getTargetConstant(SrcBT, SL, MVT::i32); +} + +SDNode *HSAILDAGToDAGISel::SelectAddrSpaceCast(AddrSpaceCastSDNode *ASC) const { + SDValue Cast(ASC, 0); + unsigned DstAS = ASC->getDestAddressSpace(); + unsigned SrcAS = ASC->getSrcAddressSpace(); + + EVT DestVT = ASC->getValueType(0); + EVT SrcVT = ASC->getOperand(0).getValueType(); + bool Src32 = (SrcVT == MVT::i32); + bool Dst32 = (DestVT == MVT::i32); + + unsigned Opcode; + SDValue Segment, NoNull, Ptr, DestType, SrcType; + + SDLoc SL(ASC); + + if (SrcAS == HSAILAS::FLAT_ADDRESS) { + if (Src32 && Dst32) + Opcode = HSAIL::FTOS_U32_U32; + else if (Src32 && !Dst32) + llvm_unreachable("Pointer size combination should not happen"); + else if (!Src32 && Dst32) + Opcode = HSAIL::FTOS_U32_U64; + else + Opcode = HSAIL::FTOS_U64_U64; + + Segment = CurDAG->getTargetConstant(DstAS, SL, MVT::i32); + SelectAddrSpaceCastCommon(*ASC, NoNull, Ptr, DestType, SrcType); + } else if (DstAS == HSAILAS::FLAT_ADDRESS) { + if (Src32 && Dst32) + Opcode = HSAIL::STOF_U32_U32; + else if (Src32 && !Dst32) + Opcode = HSAIL::STOF_U64_U32; + else if (!Src32 && Dst32) + llvm_unreachable("Pointer size combination should not happen"); + else + Opcode = HSAIL::STOF_U64_U64; + + Segment = CurDAG->getTargetConstant(SrcAS, SL, MVT::i32); + SelectAddrSpaceCastCommon(*ASC, NoNull, Ptr, DestType, SrcType); + } else + return nullptr; + + const SDValue Ops[] = {Segment, NoNull, Ptr, DestType, SrcType}; + + return CurDAG->SelectNodeTo(ASC, Opcode, DestVT, Ops); +} + +static BrigCompareOperation getBrigIntCompare(ISD::CondCode CC, bool &Signed) { + switch (CC) { + case ISD::SETUEQ: + Signed = true; // Sign is irrelevant, use to be consistent. + return BRIG_COMPARE_EQ; + case ISD::SETUGT: + return BRIG_COMPARE_GT; + case ISD::SETUGE: + return BRIG_COMPARE_GE; + case ISD::SETULT: + return BRIG_COMPARE_LT; + case ISD::SETULE: + return BRIG_COMPARE_LE; + case ISD::SETUNE: + Signed = true; // Sign is irrelevant, use to be consistent. + return BRIG_COMPARE_NE; + case ISD::SETEQ: + Signed = true; + return BRIG_COMPARE_EQ; + case ISD::SETGT: + Signed = true; + return BRIG_COMPARE_GT; + case ISD::SETGE: + Signed = true; + return BRIG_COMPARE_GE; + case ISD::SETLT: + Signed = true; + return BRIG_COMPARE_LT; + case ISD::SETLE: + Signed = true; + return BRIG_COMPARE_LE; + case ISD::SETNE: + Signed = true; + return BRIG_COMPARE_NE; + default: + llvm_unreachable("unhandled cond code"); + } +} + +static BrigCompareOperation getBrigFPCompare(ISD::CondCode CC) { + switch (CC) { + case ISD::SETOEQ: + case ISD::SETEQ: + return BRIG_COMPARE_EQ; + case ISD::SETOGT: + case ISD::SETGT: + return BRIG_COMPARE_GT; + case ISD::SETOGE: + case ISD::SETGE: + return BRIG_COMPARE_GE; + case ISD::SETOLT: + case ISD::SETLT: + return BRIG_COMPARE_LT; + case ISD::SETOLE: + case ISD::SETLE: + return BRIG_COMPARE_LE; + case ISD::SETONE: + case ISD::SETNE: + return BRIG_COMPARE_NE; + case ISD::SETO: + return BRIG_COMPARE_NUM; + case ISD::SETUO: + return BRIG_COMPARE_NAN; + case ISD::SETUEQ: + return BRIG_COMPARE_EQU; + case ISD::SETUGT: + return BRIG_COMPARE_GTU; + case ISD::SETUGE: + return BRIG_COMPARE_GEU; + case ISD::SETULT: + return BRIG_COMPARE_LTU; + case ISD::SETULE: + return BRIG_COMPARE_LEU; + case ISD::SETUNE: + return BRIG_COMPARE_NEU; + default: + llvm_unreachable("unhandled cond code"); + } +} + +static unsigned getCmpOpcode(BrigType SrcBT) { + switch (SrcBT) { + case BRIG_TYPE_B1: + return HSAIL::CMP_B1_B1; + case BRIG_TYPE_S32: + return HSAIL::CMP_B1_S32; + case BRIG_TYPE_U32: + return HSAIL::CMP_B1_U32; + case BRIG_TYPE_S64: + return HSAIL::CMP_B1_S64; + case BRIG_TYPE_U64: + return HSAIL::CMP_B1_U64; + case BRIG_TYPE_F32: + return HSAIL::CMP_B1_F32; + case BRIG_TYPE_F64: + return HSAIL::CMP_B1_F64; + default: + llvm_unreachable("Compare of type not supported"); + } +} + +SDNode *HSAILDAGToDAGISel::SelectSetCC(SDNode *SetCC) const { + SDValue LHS, RHS; + + if (!SelectGPROrImm(SetCC->getOperand(0), LHS)) + return nullptr; + + if (!SelectGPROrImm(SetCC->getOperand(1), RHS)) + return nullptr; + + MVT VT = LHS.getValueType().getSimpleVT(); + ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); + + bool Signed = false; + BrigCompareOperation BrigCmp; + + if (VT.isFloatingPoint()) + BrigCmp = getBrigFPCompare(CC); + else + BrigCmp = getBrigIntCompare(CC, Signed); + + SDLoc SL(SetCC); + + SDValue CmpOp = CurDAG->getTargetConstant(BrigCmp, SL, MVT::i32); + SDValue FTZ = CurDAG->getTargetConstant(VT == MVT::f32, SL, MVT::i1); + + // TODO: Should be able to fold conversions into this instead. + SDValue DestType = CurDAG->getTargetConstant(BRIG_TYPE_B1, SL, MVT::i32); + + BrigType SrcBT = getBrigType(VT.SimpleTy, Signed); + SDValue SrcType = CurDAG->getTargetConstant(SrcBT, SL, MVT::i32); + + const SDValue Ops[] = {CmpOp, FTZ, LHS, RHS, DestType, SrcType}; + + return CurDAG->SelectNodeTo(SetCC, getCmpOpcode(SrcBT), MVT::i1, Ops); +} + +static unsigned getLoadBrigOpcode(BrigType BT) { + switch (BT) { + case BRIG_TYPE_U32: + return HSAIL::LD_U32; + case BRIG_TYPE_S32: + return HSAIL::LD_S32; + case BRIG_TYPE_F32: + return HSAIL::LD_F32; + case BRIG_TYPE_U64: + return HSAIL::LD_U64; + case BRIG_TYPE_S64: + return HSAIL::LD_S64; + case BRIG_TYPE_F64: + return HSAIL::LD_F64; + case BRIG_TYPE_U8: + return HSAIL::LD_U8; + case BRIG_TYPE_S8: + return HSAIL::LD_S8; + case BRIG_TYPE_U16: + return HSAIL::LD_U16; + case BRIG_TYPE_S16: + return HSAIL::LD_S16; + default: + llvm_unreachable("Unhandled load type"); + } +} + +static unsigned getRArgLoadBrigOpcode(BrigType BT) { + switch (BT) { + case BRIG_TYPE_U32: + return HSAIL::RARG_LD_U32; + case BRIG_TYPE_S32: + return HSAIL::RARG_LD_S32; + case BRIG_TYPE_F32: + return HSAIL::RARG_LD_F32; + case BRIG_TYPE_U64: + return HSAIL::RARG_LD_U64; + case BRIG_TYPE_S64: + return HSAIL::RARG_LD_S64; + case BRIG_TYPE_F64: + return HSAIL::RARG_LD_F64; + case BRIG_TYPE_U8: + return HSAIL::RARG_LD_U8; + case BRIG_TYPE_S8: + return HSAIL::RARG_LD_S8; + case BRIG_TYPE_U16: + return HSAIL::RARG_LD_U16; + case BRIG_TYPE_S16: + return HSAIL::RARG_LD_S16; + default: + llvm_unreachable("Unhandled load type"); + } +} + +static unsigned getStoreBrigOpcode(BrigType BT) { + switch (BT) { + case BRIG_TYPE_U32: + return HSAIL::ST_U32; + case BRIG_TYPE_F32: + return HSAIL::ST_F32; + case BRIG_TYPE_U64: + return HSAIL::ST_U64; + case BRIG_TYPE_F64: + return HSAIL::ST_F64; + case BRIG_TYPE_U8: + return HSAIL::ST_U8; + case BRIG_TYPE_U16: + return HSAIL::ST_U16; + default: + llvm_unreachable("Unhandled load type"); + } +} + +SDNode *HSAILDAGToDAGISel::SelectArgLd(MemSDNode *Node) const { + bool IsRetLd = Node->getConstantOperandVal(3); + bool IsSext = Node->getConstantOperandVal(4); + + SDValue Base, Reg, Offset; + if (!SelectAddr(Node->getOperand(1), Base, Reg, Offset)) + return nullptr; + + MVT MemVT = Node->getMemoryVT().getSimpleVT(); + BrigType BT = getBrigType(MemVT.SimpleTy, IsSext); + + SDLoc SL(Node); + + SDValue Ops[10] = { + Base, Reg, Offset, + CurDAG->getTargetConstant(BT, SL, MVT::i32), // TypeLength + CurDAG->getTargetConstant(Node->getAddressSpace(), SL, MVT::i32), // segment + CurDAG->getTargetConstant(Node->getAlignment(), SL, MVT::i32), // align + Node->getOperand(2), // width + CurDAG->getTargetConstant(0, SL, MVT::i1), // mask + Node->getOperand(0), // Chain + SDValue() + }; + + ArrayRef OpsArr = makeArrayRef(Ops); + + if (Node->getNumOperands() == 6) + Ops[9] = Node->getOperand(5); + else + OpsArr = OpsArr.drop_back(1); + + unsigned Opcode = IsRetLd ? getRArgLoadBrigOpcode(BT) : getLoadBrigOpcode(BT); + + return CurDAG->SelectNodeTo(Node, Opcode, Node->getVTList(), OpsArr); +} + +SDNode *HSAILDAGToDAGISel::SelectArgSt(MemSDNode *Node) const { + SDValue Base, Reg, Offset; + if (!SelectAddr(Node->getOperand(2), Base, Reg, Offset)) + return nullptr; + + SDLoc SL(Node); + + MVT MemVT = Node->getMemoryVT().getSimpleVT(); + BrigType BT = getBrigType(MemVT.SimpleTy, false); + + SDValue Ops[9] = { + Node->getOperand(1), + Base, Reg, Offset, + CurDAG->getTargetConstant(BT, SL, MVT::i32), // TypeLength + CurDAG->getTargetConstant(Node->getAddressSpace(), SL, MVT::i32), // segment + CurDAG->getTargetConstant(Node->getAlignment(), SL, MVT::i32), // align + Node->getOperand(0), // Chain + SDValue() + }; + + ArrayRef OpsArr = makeArrayRef(Ops); + + if (Node->getNumOperands() == 4) + Ops[8] = Node->getOperand(3); + else + OpsArr = OpsArr.drop_back(1); + + return CurDAG->SelectNodeTo(Node, getStoreBrigOpcode(BT), Node->getVTList(), + OpsArr); +} + +FunctionPass *llvm::createHSAILISelDag(TargetMachine &TM) { + return new HSAILDAGToDAGISel(TM); +} Index: lib/Target/HSAIL/HSAILISelLowering.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILISelLowering.h @@ -0,0 +1,230 @@ +//===---- HSAILISelLowering.h - HSAIL DAG Lowering Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes how to lower LLVM code to machine code. This has two +// main components: +// +// 1. Which ValueTypes are natively supported by the target. +// 2. Which operations are supported for supported ValueTypes. +// 3. Cost thresholds for alternative implementations of certain operations. +// +// In addition it has a few other components, like information about FP +// immediates. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILISELLOWERING_H +#define LLVM_LIB_TARGET_HSAIL_HSAILISELLOWERING_H + +#include "HSAILRegisterInfo.h" +#include "HSAILMachineFunctionInfo.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +class HSAILSubtarget; + +class HSAILTargetLowering : public TargetLowering { +private: + const HSAILSubtarget *Subtarget; + const HSAILRegisterInfo *RegInfo; + +public: + explicit HSAILTargetLowering(HSAILTargetMachine &TM, + const HSAILSubtarget &ST); + virtual ~HSAILTargetLowering(); + + EVT getSetCCResultType(const DataLayout &DL, + LLVMContext &Context, EVT VT) const override; + + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; + + uint8_t getRepRegClassCostFor(MVT VT) const override; + + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + + bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace = 0, + unsigned Align = 1, + bool * /*Fast*/ = nullptr) const override; + + unsigned getJumpTableEncoding() const override; + + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + + unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const SelectionDAG &DAG, + unsigned Depth = 0) const override; + + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + + bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; + + bool isLoadBitCastBeneficial(EVT load, EVT bitcast) const override; + + virtual bool + isVectorToScalarLoadStoreWidenBeneficial(unsigned Width, EVT WidenVT, + const MemSDNode *N) const; + +protected: + /// Recursively lower a single argument. + /// Either Ins or Outs must non-zero, which means we are doing argument load + /// or store. + /// ArgNo is an index to InVals and OutVals, which is advanced after the call. + /// AS is an address space of argument, either arg or kernarg + /// ParamPtr is a pointer value for argument to load from or store to. + /// Offset is a value which has to be added to the pointer. + /// If InFlag is present glue all operations. + /// If ChainLink is true chain link all operations. + /// Returns last operation value. + SDValue LowerArgument(SDValue Chain, SDValue InFlag, bool ChainLink, + const SmallVectorImpl *Ins, + const SmallVectorImpl *Outs, SDLoc dl, + SelectionDAG &DAG, SmallVectorImpl *InVals, + unsigned &ArgNo, Type *type, unsigned AS, + const char *ParamName, SDValue ParamPtr, + const SmallVectorImpl *OutVals = nullptr, + bool isRetArgLoad = false, + const AAMDNodes & = AAMDNodes(), + uint64_t offset = 0) const; + +public: + SDValue getArgLoad(SelectionDAG &DAG, SDLoc SL, EVT ArgVT, Type *Ty, + bool isSExt, unsigned AddressSpace, SDValue Chain, + SDValue Ptr, SDValue InFlag, unsigned index, + bool IsRetArgLoad = false, uint64_t Offset = 0) const; + + SDValue getArgStore(SelectionDAG &DAG, SDLoc SL, EVT ArgVT, Type *Ty, + unsigned AddressSpace, SDValue Chain, SDValue Ptr, + SDValue Value, unsigned Index, SDValue InFlag, + const AAMDNodes &AAInfo = AAMDNodes(), + uint64_t Offset = 0) const; + + //===--------------------------------------------------------------------===// + // Lowering methods - These methods must be implemented by targets so that + // the SelectionDAGLowering code knows how to lower these. + + SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, + const SmallVectorImpl &ArgInfo, + SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, + MachineFrameInfo *MFI, unsigned i) const; + + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Ins, + SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const override; + + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, SDLoc dl, + SelectionDAG &DAG) const override; + + SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + + void ReplaceNodeResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG) const override; + + const char *getTargetNodeName(unsigned Opcode) const override; + + /// Custom lowering methods + SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerLdKernargIntrinsic(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; + + SDValue lowerSamplerInitializerOperand(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerROTR(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + + SDValue LowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + + //===--------------------------------------------------------------------===// + // Instruction Emitting Hooks + // + bool isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AddrSpace) const override; + + bool isZExtFree(Type *Ty1, Type *Ty2) const override; + + bool isZExtFree(EVT VT1, EVT VT2) const override; + + bool isFAbsFree(EVT VT) const override; + bool isFNegFree(EVT VT) const override; + + bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; + + bool isLegalICmpImmediate(int64_t Imm) const override; + + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT LHSTy) const override; + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; + + void AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const override; +}; + +namespace HSAILISD { +// HSAIL Specific DAG Nodes +enum { + // Start the numbering where the builtin ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + CALL, // Function call based on a single integer + RET, + SMAX, + UMAX, + SMIN, + UMIN, + FRACT, + NFMA, + UMAD, + SMAD, + UMUL24, + SMUL24, + UMAD24, + SMAD24, + BITSELECT, + SBITEXTRACT, + UBITEXTRACT, + FLDEXP, + CLASS, + LDA, + ACTIVELANEPERMUTE, + ACTIVELANEID, + ACTIVELANECOUNT, + ACTIVELANEMASK, + KERNARGBASEPTR, + SEGMENTP, + + FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, + + // Load and store of arguments. Main purpose is to add glue to what would + // be a generic load / store. + ARG_LD, + ARG_ST +}; +} +} // end llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILISelLowering.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILISelLowering.cpp @@ -0,0 +1,1996 @@ +//===-- HSAILISelLowering.cpp - HSAIL DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that HSAIL uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hsail-isel" +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILInstrInfo.h" +#include "HSAILISelLowering.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILSubtarget.h" +#include "HSAILTargetMachine.h" +#include "HSAILUtilityFunctions.h" +#include "HSAILOpaqueTypes.h" + +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/raw_ostream.h" + +#include "HSAILGenInstrInfo.inc" + +using namespace llvm; +using namespace dwarf; + +HSAILTargetLowering::HSAILTargetLowering(HSAILTargetMachine &TM, + const HSAILSubtarget &ST) + : TargetLowering(TM), + Subtarget(&ST) { + // HSAIL uses a -1 to store a Boolean value as an int. For example, + // see the return values of the cmp instructions. This also requires + // that we never use a cvt instruction for converting a Boolean to a + // larger integer, because HSAIL cvt uses a zext when the source is + // b1. Due to the setting below, LLVM will ensure that all such + // conversions are done with the sext instruction. + setBooleanContents(ZeroOrNegativeOneBooleanContent); + + RegInfo = Subtarget->getRegisterInfo(); + + // Set up the register classes. + addRegisterClass(MVT::i32, &HSAIL::GPR32RegClass); + addRegisterClass(MVT::i64, &HSAIL::GPR64RegClass); + addRegisterClass(MVT::f32, &HSAIL::GPR32RegClass); + addRegisterClass(MVT::f64, &HSAIL::GPR64RegClass); + addRegisterClass(MVT::i1, &HSAIL::CRRegClass); + + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FMINNUM, MVT::f32, Legal); + setOperationAction(ISD::FMINNUM, MVT::f64, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); + + setOperationAction(ISD::BSWAP, MVT::i32, Custom); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + setOperationAction(ISD::ADD, MVT::i1, Custom); + setOperationAction(ISD::ROTL, MVT::i1, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Custom); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i1, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Custom); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand); + + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); + + setOperationAction(ISD::SELECT, MVT::f64, Promote); + AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); + + setOperationAction(ISD::SELECT, MVT::f32, Promote); + AddPromotedToType(ISD::SELECT, MVT::f32, MVT::i32); + + setOperationAction(ISD::GlobalAddress, MVT::i32, Legal); + setOperationAction(ISD::GlobalAddress, MVT::i64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::Constant, MVT::i32, Legal); + setOperationAction(ISD::Constant, MVT::i64, Legal); + + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + + for (MVT VT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + + for (MVT VT : MVT::fp_vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Expand); + } + + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom); + } + + for (MVT VT : MVT::integer_vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v1i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v16i32, Expand); + + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v1i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v16i16, Expand); + + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::v16i8, Expand); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v1i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v8i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v16i16, Expand); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v8i8, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v16i8, Expand); + + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v1i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v4i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v8i32, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v16i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v1i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i32, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v1i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i16, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i16, Expand); + + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v16i8, Expand); + } + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); + setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand); + setTruncStoreAction(MVT::i64, MVT::i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v1i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v2i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v4i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v8i32, Expand); + setTruncStoreAction(MVT::i64, MVT::v16i32, Expand); + setTruncStoreAction(MVT::i64, MVT::i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v1i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v2i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v4i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v8i16, Expand); + setTruncStoreAction(MVT::i64, MVT::v16i16, Expand); + setTruncStoreAction(MVT::i64, MVT::i1, Expand); + setTruncStoreAction(MVT::i64, MVT::i8, Expand); + setTruncStoreAction(MVT::i64, MVT::v2i8, Expand); + setTruncStoreAction(MVT::i64, MVT::v4i8, Expand); + setTruncStoreAction(MVT::i64, MVT::v8i8, Expand); + setTruncStoreAction(MVT::i64, MVT::v16i8, Expand); + + setOperationAction(ISD::STORE, MVT::i1, Custom); + setOperationAction(ISD::LOAD, MVT::i1, Custom); + + setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom); + + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + + setHasMultipleConditionRegisters(true); + setJumpIsExpensive(true); + setSelectIsExpensive(true); + setPow2SDivIsCheap(false); + setPrefLoopAlignment(0); + setSchedulingPreference((CodeGenOpt::None == TM.getOptLevel()) ? Sched::Source + : Sched::ILP); +#ifdef _DEBUG + const char *pScheduler = std::getenv("AMD_DEBUG_HSAIL_PRE_RA_SCHEDULER"); + if (pScheduler) { + if (strcmp(pScheduler, "ilp") == 0) { + printf("Overriding pre-RA scheduler with %s\n", pScheduler); + setSchedulingPreference(Sched::ILP); + } else if (strcmp(pScheduler, "regpressure") == 0) { + printf("Overriding pre-RA scheduler with %s\n", pScheduler); + setSchedulingPreference(Sched::RegPressure); + } else if (strcmp(pScheduler, "hybrid") == 0) { + printf("Overriding pre-RA scheduler with %s\n", pScheduler); + setSchedulingPreference(Sched::Hybrid); + } + } +#endif + + computeRegisterProperties(ST.getRegisterInfo()); + + MaxStoresPerMemcpy = 4096; + MaxStoresPerMemmove = 4096; + MaxStoresPerMemset = 4096; +} + +HSAILTargetLowering::~HSAILTargetLowering() {} + +EVT HSAILTargetLowering::getSetCCResultType(const DataLayout &DL, + LLVMContext &Context, + EVT VT) const { + return MVT::i1; +} + +const TargetRegisterClass * +HSAILTargetLowering::getRepRegClassFor(MVT VT) const { + switch (VT.SimpleTy) { + case MVT::i64: + case MVT::f64: + return &HSAIL::GPR64RegClass; + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::f32: + return &HSAIL::GPR32RegClass; + case MVT::i1: + return &HSAIL::CRRegClass; + default: + llvm_unreachable("Cannot find register class for value type"); + break; + } + return nullptr; +} + +uint8_t HSAILTargetLowering::getRepRegClassCostFor(MVT VT) const { + return 1; +} + +bool HSAILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + // All floating point types are legal for 32bit and 64bit types. + return (VT == EVT(MVT::f32) || VT == EVT(MVT::f64)); +} + +bool HSAILTargetLowering::allowsMisalignedMemoryAccesses(EVT, + unsigned AddrSpace, + unsigned Align, + bool *Fast) const { + return true; +} + +unsigned HSAILTargetLowering::getJumpTableEncoding() const { + return MachineJumpTableInfo::EK_BlockAddress; +} + +bool HSAILTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + return true; +} + +unsigned HSAILTargetLowering::ComputeNumSignBitsForTargetNode( + SDValue Op, const SelectionDAG &DAG, unsigned Depth) const { + return 1; +} + +static SDValue PerformBitalignCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + unsigned IID) { + assert(IID == HSAILIntrinsic::HSAIL_bitalign_b32 || + IID == HSAILIntrinsic::HSAIL_bytealign_b32); + SDValue Opr0 = N->getOperand(1); + SDValue Opr1 = N->getOperand(2); + SDValue Opr2 = N->getOperand(3); + ConstantSDNode *SHR = dyn_cast(Opr2); + SelectionDAG &DAG = DCI.DAG; + SDLoc dl = SDLoc(N); + EVT VT = N->getValueType(0); + // fold bitalign_b32(x & c1, x & c1, c2) -> bitalign_b32(x, x, c2) & rotr(c1, + // c2) + if (SHR && (Opr0 == Opr1) && (Opr0.getOpcode() == ISD::AND)) { + if (ConstantSDNode *AndMask = + dyn_cast(Opr0.getOperand(1))) { + uint64_t and_mask = AndMask->getZExtValue(); + uint64_t shr_val = SHR->getZExtValue() & 31U; + if (IID == HSAILIntrinsic::HSAIL_bytealign_b32) + shr_val = (shr_val & 3U) << 3U; + and_mask = + ((and_mask >> shr_val) | (and_mask << (32U - shr_val))) & 0xffffffffu; + Opr0 = Opr0->getOperand(0); + return DAG.getNode(ISD::AND, SDLoc(Opr1), Opr1.getValueType(), + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(IID, dl, MVT::i32), + Opr0, Opr0, Opr2), + DAG.getConstant(and_mask, dl, MVT::i32)); + } + } + // fold bitalign_b32(x, y, c) -> bytealign_b32(x, y, c/8) if c & 7 == 0 + if (SHR && (IID == HSAILIntrinsic::HSAIL_bitalign_b32)) { + uint64_t shr_val = SHR->getZExtValue() & 31U; + if ((shr_val & 7U) == 0) + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(HSAILIntrinsic::HSAIL_bytealign_b32, dl, MVT::i32), + Opr0, Opr1, DAG.getConstant(shr_val >> 3U, dl, MVT::i32)); + } + return SDValue(); +} + +static SDValue +PerformIntrinsic_Wo_ChainCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN); + unsigned IID = cast(N->getOperand(0))->getZExtValue(); + switch (IID) { + case HSAILIntrinsic::HSAIL_bitalign_b32: // fall-through + case HSAILIntrinsic::HSAIL_bytealign_b32: + return PerformBitalignCombine(N, DCI, IID); + } + return SDValue(); +} + +SDValue HSAILTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + switch (N->getOpcode()) { + case ISD::INTRINSIC_WO_CHAIN: + return PerformIntrinsic_Wo_ChainCombine(N, DCI); + default: + break; + } + + return SDValue(); +} + +bool HSAILTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, + EVT VT) const { + return (Opc == ISD::LOAD || Opc == ISD::STORE) && + (VT.getSimpleVT() == MVT::f32 || VT.getSimpleVT() == MVT::f64); +} + +//===--------------------------------------------------------------------===// + +/// n-th element of a vector has different alignment than a base. +/// This function returns alignment for n-th alement. + +// FIXME: It is probably not correct to use this. +static unsigned getElementAlignment(const DataLayout &DL, Type *Ty, + unsigned n) { + if (Ty->isArrayTy()) // FIXME + return getElementAlignment(DL, Ty->getArrayElementType(), 0); + + unsigned Alignment = DL.getABITypeAlignment(Ty); + if (n && (Alignment > 1)) { + Type *EltTy = Ty->getScalarType(); + unsigned ffs = 0; + while (((n >> ffs) & 1) == 0) + ffs++; + Alignment = (DL.getABITypeAlignment(EltTy) * (1 << ffs)) & (Alignment - 1); + } else { + if (OpaqueType OT = GetOpaqueType(Ty)) { + if (IsImage(OT) || OT == Sampler) + Alignment = 8; + } + } + return Alignment; +} + +SDValue +HSAILTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + SDLoc dl, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + HSAILMachineFunctionInfo *FuncInfo = MF.getInfo(); + HSAILParamManager &PM = FuncInfo->getParamManager(); + const Function *F = MF.getFunction(); + const FunctionType *funcType = F->getFunctionType(); + + SmallVector RetOps; + RetOps.push_back(Chain); + + const DataLayout &DL = F->getParent()->getDataLayout(); + + Type *type = funcType->getReturnType(); + if (!type->isVoidTy()) { + Mangler Mang; + + // FIXME: The ParamManager here is only used for making sure the built + // string's name survives until code emission. We can't rely on the name + // here being added because unreachable functions with return values may not + // have return instructions. + const char *SymName = PM.getParamName( + PM.addReturnParam(type, PM.mangleArg(&Mang, F->getName(), DL))); + + MVT ArgPtrVT = getPointerTy(DL, HSAILAS::ARG_ADDRESS); + SDValue RetVariable = DAG.getTargetExternalSymbol(SymName, ArgPtrVT); + + AAMDNodes MD; // FIXME: What is this for? + // Value *mdops[] = { const_cast(F) }; + // MDNode *MD = MDNode::get(F->getContext(), mdops); + + unsigned ArgNo = 0; + LowerArgument(Chain, SDValue(), false, nullptr, &Outs, dl, DAG, &RetOps, ArgNo, + type, HSAILAS::ARG_ADDRESS, nullptr, RetVariable, &OutVals, + false, MD); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, RetOps); + } + + return DAG.getNode(HSAILISD::RET, dl, MVT::Other, Chain); +} + +/// Create kernel or function parameter scalar load and return its +/// value. AddressSpace used to determine if that is a kernel or function +/// argument. ArgVT specifies expected value type where 'Ty' refers to the real +/// argument type from function's signature. +/// +/// If the call sequence is not glued we may have unrelated to call instructions +/// scheduled into the argscope if intent was argscope use. This function +/// inserts a load or store argument instruction with glue. If InFlag contains +/// glue it is used for inbound glue. Glue is produced as a last result and can +/// be consumed at will of the caller. Offset operand is added to the offset +/// value calculated from index. +SDValue HSAILTargetLowering::getArgLoad(SelectionDAG &DAG, SDLoc SL, EVT ArgVT, + Type *Ty, bool isSExt, + unsigned AddressSpace, SDValue Chain, + SDValue Ptr, SDValue InFlag, + unsigned Index, bool IsRetArgLoad, + uint64_t Offset) const { + const MachineFunction &MF = DAG.getMachineFunction(); + const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + Type *EltTy = Ty; + + if (Ty->isArrayTy()) + EltTy = Ty->getArrayElementType(); + EltTy = EltTy->getScalarType(); + + MVT PtrVT = getPointerTy(DL, AddressSpace); + PointerType *ArgPT = PointerType::get(EltTy, AddressSpace); + + // TODO_HSA: check if that works with packed structs, it can happen + // we would need to inhibit alignment calculation in that case. + Offset += DL.getTypeStoreSize(EltTy) * Index; + + EVT MemVT = ArgVT; + if (ArgVT == MVT::i1) + MemVT = MVT::i8; + + if (!Ptr && AddressSpace == HSAILAS::KERNARG_ADDRESS) { + // If the argument symbol is unknown, generate a kernargbaseptr instruction. + Ptr = DAG.getNode(HSAILISD::KERNARGBASEPTR, SL, PtrVT); + } + + unsigned Align = getElementAlignment(DL, Ty, Index); + unsigned Width = BRIG_WIDTH_1; + + // TODO_HSA: Due to problems with RT alignment of vectors we have to + // use element size instead of vector size for alignment. + // Fix when RT is fixed. + if (AddressSpace == HSAILAS::KERNARG_ADDRESS) { + Align = DL.getABITypeAlignment(EltTy); + Width = BRIG_WIDTH_ALL; + } + + SDValue PtrOffs = + DAG.getNode(ISD::ADD, SL, PtrVT, Ptr, DAG.getConstant(Offset, SL, PtrVT)); + + const SDValue Ops[] = { + Chain, + PtrOffs, + DAG.getTargetConstant(Width, SL, MVT::i32), + DAG.getTargetConstant(IsRetArgLoad, SL, MVT::i1), + DAG.getTargetConstant(isSExt, SL, MVT::i1), + InFlag + }; + + ArrayRef OpsArr = makeArrayRef(Ops); + if (!InFlag) + OpsArr = OpsArr.drop_back(1); + + EVT VT = (MemVT.getStoreSize() < 4) ? MVT::i32 : ArgVT; + SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue); + + MachinePointerInfo PtrInfo(UndefValue::get(ArgPT), Offset); + + SDValue Arg = DAG.getMemIntrinsicNode(HSAILISD::ARG_LD, SL, VTs, OpsArr, + MemVT, PtrInfo, Align, + false, // isVolatile + true, // ReadMem + false, // WriteMem + MemVT.getStoreSize()); // Size + + if (ArgVT == MVT::i1) { + const SDValue Ops[] = {DAG.getNode(ISD::TRUNCATE, SL, MVT::i1, Arg), + Arg.getValue(1), Arg.getValue(2)}; + + return DAG.getMergeValues(Ops, SL); + } + + return Arg; +} + +SDValue HSAILTargetLowering::getArgStore( + SelectionDAG &DAG, SDLoc SL, EVT ArgVT, Type *Ty, unsigned AddressSpace, + SDValue Chain, SDValue Ptr, SDValue Value, unsigned Index, SDValue InFlag, + const AAMDNodes &AAInfo, uint64_t Offset) const { + const MachineFunction &MF = DAG.getMachineFunction(); + const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + + Type *EltTy = Ty; + if (Ty->isArrayTy()) + EltTy = Ty->getArrayElementType(); + EltTy = EltTy->getScalarType(); + MVT PtrVT = getPointerTy(DL, AddressSpace); + PointerType *ArgPT = PointerType::get(EltTy, AddressSpace); + // TODO_HSA: check if that works with packed structs, it can happen + // we would need to inhibit alignment calculation in that case. + Offset += DL.getTypeStoreSize(EltTy) * Index; + + EVT MemVT = ArgVT; + + if (ArgVT == MVT::i1) { + MemVT = MVT::i8; + Value = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Value); + } + + SDValue PtrOffs = + DAG.getNode(ISD::ADD, SL, PtrVT, Ptr, DAG.getConstant(Offset, SL, PtrVT)); + + unsigned Align = getElementAlignment(DL, Ty, Index); + // TODO_HSA: Due to problems with RT alignment of vectors we have to + // use element size instead of vector size for alignment. + // Fix when RT is fixed. + if (AddressSpace == HSAILAS::KERNARG_ADDRESS) + Align = DL.getABITypeAlignment(EltTy); + + SDValue Ops[] = {Chain, Value, PtrOffs, InFlag}; + + ArrayRef OpsArr = makeArrayRef(Ops); + if (!InFlag) + OpsArr = OpsArr.drop_back(1); + + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + MachinePointerInfo PtrInfo(UndefValue::get(ArgPT), Offset); + + return DAG.getMemIntrinsicNode(HSAILISD::ARG_ST, SL, VTs, OpsArr, MemVT, + PtrInfo, Align, + false, // isVolatile + false, // ReadMem + true, // WriteMem + MemVT.getStoreSize()); +} + +/// Recursively lower a single argument or its element. +/// Either Ins or Outs must non-zero, which means we are doing argument load +/// or store. +/// ArgNo is an index to InVals and OutVals, which is advanced after the call. +/// AS is an address space of argument, either arg or kernarg +/// ParamPtr is a pointer value for argument to load from or store to. +/// Offset is a value which has to be added to the pointer. +/// If InFlag gis present lue all operations. +/// If ChainLink is true chain link all operations. +/// Returns last operation value. +SDValue HSAILTargetLowering::LowerArgument( + SDValue Chain, SDValue InFlag, bool ChainLink, + const SmallVectorImpl *Ins, + const SmallVectorImpl *Outs, SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl *InVals, unsigned &ArgNo, Type *type, unsigned AS, + const char *ParamName, SDValue ParamPtr, + const SmallVectorImpl *OutVals, bool isRetArgLoad, + const AAMDNodes &AAInfo, uint64_t offset) const { + assert((Ins == nullptr && Outs != nullptr) || (Ins != nullptr && Outs == nullptr)); + + const MachineFunction &MF = DAG.getMachineFunction(); + const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + + Type *sType = type->getScalarType(); + + EVT argVT = Ins ? (*Ins)[ArgNo].VT : (*Outs)[ArgNo].VT; + + if (sType->isIntegerTy(8)) + argVT = MVT::i8; + else if (sType->isIntegerTy(16)) + argVT = MVT::i16; + + bool isLoad = Ins != nullptr; + bool hasFlag = InFlag.getNode() != nullptr; + SDValue ArgValue; + + const VectorType *VecTy = dyn_cast(type); + const ArrayType *ArrTy = dyn_cast(type); + if (VecTy || ArrTy) { + // This assumes that char and short vector elements are unpacked in Ins. + unsigned num_elem = + VecTy ? VecTy->getNumElements() : ArrTy->getNumElements(); + for (unsigned i = 0; i < num_elem; ++i) { + if (isLoad) { + bool IsSExt = (*Ins)[ArgNo].Flags.isSExt(); + ArgValue = getArgLoad(DAG, dl, argVT, type, IsSExt, AS, Chain, ParamPtr, + InFlag, i, isRetArgLoad, offset); + } else { + ArgValue = getArgStore(DAG, dl, argVT, type, AS, Chain, ParamPtr, + (*OutVals)[ArgNo], i, InFlag, AAInfo, offset); + } + + if (ChainLink) + Chain = ArgValue.getValue(isLoad ? 1 : 0); + + // Glue next vector loads regardless of input flag to favor vectorization. + InFlag = ArgValue.getValue(isLoad ? 2 : 1); + if (InVals) + InVals->push_back(ArgValue); + ArgNo++; + } + + return ArgValue; + } + + if (StructType *STy = dyn_cast(type)) { + const StructLayout *SL = DL.getStructLayout(STy); + unsigned num_elem = STy->getNumElements(); + for (unsigned i = 0; i < num_elem; ++i) { + ArgValue = LowerArgument(Chain, InFlag, ChainLink, Ins, Outs, dl, DAG, + InVals, ArgNo, STy->getElementType(i), AS, + ParamName, ParamPtr, OutVals, isRetArgLoad, + AAInfo, offset + SL->getElementOffset(i)); + if (ChainLink) + Chain = ArgValue.getValue(isLoad ? 1 : 0); + + if (hasFlag) + InFlag = ArgValue.getValue(isLoad ? 2 : 1); + } + return ArgValue; + } + + // Regular scalar load case. + if (isLoad) { + bool IsSExt = (*Ins)[ArgNo].Flags.isSExt(); + ArgValue = getArgLoad(DAG, dl, argVT, type, IsSExt, AS, Chain, ParamPtr, + InFlag, 0, isRetArgLoad, offset); + + } else { + ArgValue = getArgStore(DAG, dl, argVT, type, AS, Chain, ParamPtr, + (*OutVals)[ArgNo], 0, InFlag, AAInfo, offset); + } + + if (InVals) + InVals->push_back(ArgValue); + ArgNo++; + + return ArgValue; +} + +SDValue HSAILTargetLowering::LowerFormalArguments( + SDValue Chain, CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl &Ins, SDLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + HSAILMachineFunctionInfo *FuncInfo = MF.getInfo(); + HSAILParamManager &PM = FuncInfo->getParamManager(); + unsigned AS = HSAIL::isKernelFunc(MF.getFunction()) ? HSAILAS::KERNARG_ADDRESS + : HSAILAS::ARG_ADDRESS; + const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + + MVT PtrTy = getPointerTy(DL, AS); + Mangler Mang; + + // Map function param types to Ins. + Function::const_arg_iterator AI = MF.getFunction()->arg_begin(); + Function::const_arg_iterator AE = MF.getFunction()->arg_end(); + for (unsigned ArgNo = 0; AI != AE; ++AI) { + unsigned Param = PM.addArgumentParam( + AS, *AI, HSAILParamManager::mangleArg(&Mang, AI->getName(), DL)); + const char *ParamName = PM.getParamName(Param); + std::string md = (AI->getName() + ":" + ParamName + " ").str(); + FuncInfo->addMetadata("argmap:" + md, true); + SDValue ParamPtr = DAG.getTargetExternalSymbol(ParamName, PtrTy); + + // FIXME: What is this for? + // Value *mdops[] = { const_cast(&(*AI)) }; + // MDNode *ArgMD = MDNode::get(MF.getFunction()->getContext(), mdops); + + LowerArgument(Chain, SDValue(), false, &Ins, nullptr, dl, DAG, &InVals, ArgNo, + AI->getType(), AS, ParamName, ParamPtr, nullptr); + } + + return Chain; +} + +static BrigType getParamBrigType(Type *Ty, const DataLayout &DL, bool IsSExt) { + BrigType BT = HSAIL::getBrigType(Ty, DL, IsSExt); + if (BT == BRIG_TYPE_B1) { + assert(!IsSExt && "When does this happen?"); + return IsSExt ? BRIG_TYPE_S8 : BRIG_TYPE_U8; + } + + return BT; +} + +SDValue HSAILTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &dl = CLI.DL; + SmallVector &Outs = CLI.Outs; + SmallVector &OutVals = CLI.OutVals; + SmallVector &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CLI.IsTailCall = false; + + MachineFunction &MF = DAG.getMachineFunction(); + HSAILParamManager &PM = + MF.getInfo()->getParamManager(); + + const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + + Mangler Mang; + + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + SDValue InFlag = Chain.getValue(1); + + const FunctionType *funcType = nullptr; + const Function *calleeFunc = nullptr; + const char *FuncName = nullptr; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress/ + // TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + unsigned AS = G->getAddressSpace(); + const GlobalValue *GV = G->getGlobal(); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(DL, AS)); + + if (const GlobalAlias *GA = dyn_cast(GV)) + calleeFunc = cast(GA->getAliasee()); + else + calleeFunc = cast(GV); + + funcType = calleeFunc->getFunctionType(); + FuncName = GV->getName().data(); + } else + llvm_unreachable( + "Cannot lower call to a function which is not a global address"); + + assert(funcType != nullptr); + + SmallVector Ops; + SmallVector VarOps; + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + + Type *retType = funcType->getReturnType(); + SDValue RetValue; + if (!retType->isVoidTy()) { + MVT PtrVT = getPointerTy(DL, HSAILAS::ARG_ADDRESS); + RetValue = DAG.getTargetExternalSymbol( + PM.getParamName( + PM.addCallRetParam(retType, PM.mangleArg(&Mang, FuncName, DL))), + PtrVT); + + unsigned NElts; + Type *EmitTy = HSAIL::analyzeType(retType, NElts, DL); + + BrigType BT = getParamBrigType(EmitTy, DL, CLI.RetSExt); + + unsigned Align = HSAIL::getAlignTypeQualifier(retType, DL, false); + + const SDValue ArgDeclOps[] = { + RetValue, + DAG.getTargetConstant(BT, dl, MVT::i32), + DAG.getTargetConstant(NElts, dl, PtrVT), + DAG.getTargetConstant(Align, dl, MVT::i32), + Chain, + InFlag + }; + + SDNode *ArgDeclNode = + DAG.getMachineNode(HSAIL::ARG_DECL, dl, VTs, ArgDeclOps); + + SDValue ArgDecl(ArgDeclNode, 0); + + Chain = SDValue(ArgDeclNode, 0); + InFlag = Chain.getValue(1); + + VarOps.push_back(RetValue); + } + + // Delimit return value and parameters with 0 + VarOps.push_back(DAG.getTargetConstant(0, dl, MVT::i32)); + unsigned FirstArg = VarOps.size(); + + unsigned int j = 0, k = 0; + Function::const_arg_iterator ai; + Function::const_arg_iterator ae; + if (calleeFunc) { + ai = calleeFunc->arg_begin(); + ae = calleeFunc->arg_end(); + } + + MVT ArgPtrVT = getPointerTy(DL, HSAILAS::ARG_ADDRESS); + + MDBuilder MDB(*DAG.getContext()); + for (FunctionType::param_iterator pb = funcType->param_begin(), + pe = funcType->param_end(); + pb != pe; ++pb, ++ai, ++k) { + Type *type = *pb; + + std::string ParamName; + if (calleeFunc && ai != ae) { + ParamName = PM.mangleArg(&Mang, ai->getName(), DL); + } + if (ParamName.empty()) { + ParamName = "__param_p"; + ParamName.append(itostr(k)); + } + SDValue StParamValue = DAG.getTargetExternalSymbol( + PM.getParamName(PM.addCallArgParam(type, ParamName)), ArgPtrVT); + + unsigned NElts; + Type *EmitTy = HSAIL::analyzeType(type, NElts, DL); + + // START array parameter declaration + BrigType BT = getParamBrigType(EmitTy, DL, Outs[j].Flags.isSExt()); + + unsigned Align = HSAIL::getAlignTypeQualifier(type, DL, false); + const SDValue ArgDeclOps[] = { + StParamValue, + DAG.getTargetConstant(BT, dl, MVT::i32), + DAG.getTargetConstant(NElts, dl, ArgPtrVT), + DAG.getTargetConstant(Align, dl, MVT::i32), + Chain, + InFlag + }; + + SDNode *ArgDeclNode = + DAG.getMachineNode(HSAIL::ARG_DECL, dl, VTs, ArgDeclOps); + Chain = SDValue(ArgDeclNode, 0); + InFlag = Chain.getValue(1); + + // END array parameter declaration + VarOps.push_back(StParamValue); + + for (; j < Outs.size() - 1; j++) { + if (Outs[j].OrigArgIndex != Outs[j + 1].OrigArgIndex) + break; + } + j++; + } + + j = k = 0; + for (FunctionType::param_iterator pb = funcType->param_begin(), + pe = funcType->param_end(); + pb != pe; ++pb, ++k) { + Type *type = *pb; + Chain = LowerArgument(Chain, InFlag, true, nullptr, &Outs, dl, DAG, nullptr, j, + type, HSAILAS::ARG_ADDRESS, nullptr, + VarOps[FirstArg + k], &OutVals); + InFlag = Chain.getValue(1); + } + + // If this is a direct call, pass the chain and the callee + if (Callee.getNode()) { + Ops.push_back(Callee); + } + + // Add actual arguments to the end of the list + for (unsigned int i = 0, e = VarOps.size(); i != e; ++i) { + Ops.push_back(VarOps[i]); + } + + Ops.push_back(Chain); + Ops.push_back(InFlag); + + SDNode *Call = DAG.getMachineNode(HSAIL::CALL, dl, VTs, Ops); + Chain = SDValue(Call, 0); + + InFlag = Chain.getValue(1); + + // Read return value. + if (!Ins.empty()) { + j = 0; + Chain = LowerArgument(Chain, InFlag, true, &Ins, nullptr, dl, DAG, &InVals, j, + retType, HSAILAS::ARG_ADDRESS, nullptr, RetValue, nullptr, + true); + InFlag = Chain.getValue(2); + Chain = Chain.getValue(1); + } + + // Create the CALLSEQ_END node + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), InFlag, dl); + return Chain; +} + +#define LOWER(A) \ + case ISD::A: \ + return Lower##A(Op, DAG) + +SDValue HSAILTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + LOWER(INTRINSIC_WO_CHAIN); + LOWER(INTRINSIC_W_CHAIN); + LOWER(ROTL); + LOWER(ROTR); + LOWER(BSWAP); + LOWER(ADD); + LOWER(LOAD); + LOWER(STORE); + LOWER(ATOMIC_LOAD); + LOWER(ATOMIC_STORE); + break; + default: + Op.getNode()->dump(); + assert(0 && "Custom lowering code for this" + "instruction is not implemented yet!"); + break; + } + return Op; +} + +/// ReplaceNodeResults - This callback is invoked when a node result type is +/// illegal for the target, and the operation was registered to use 'custom' +/// lowering for that result type. The target places new result values for +/// the node in Results (their number and types must exactly match those of +/// the original return values of the node), or leaves Results empty, which +/// indicates that the node is not to be custom lowered after all. +/// +/// If the target has no operations that require custom lowering, it need not +/// implement this. The default implementation aborts. +void HSAILTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG) const { + return TargetLowering::ReplaceNodeResults(N, Results, DAG); +} + +/// getTargetNodeName() - This method returns the name of a target specific +/// DAG node. +const char *HSAILTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: + llvm_unreachable("Unknown target-node"); + return nullptr; + case HSAILISD::CALL: + return "HSAILISD::CALL"; + case HSAILISD::RET: + return "HSAILISD::RET"; + case HSAILISD::SMAX: + return "HSAILISD::SMAX"; + case HSAILISD::UMAX: + return "HSAILISD::UMAX"; + case HSAILISD::SMIN: + return "HSAILISD::SMIN"; + case HSAILISD::UMIN: + return "HSAILISD::UMIN"; + case HSAILISD::FRACT: + return "HSAILISD::FRACT"; + case HSAILISD::NFMA: + return "HSAILISD::NFMA"; + case HSAILISD::UMAD: + return "HSAILISD::UMAD"; + case HSAILISD::SMAD: + return "HSAILISD::SMAD"; + case HSAILISD::UMUL24: + return "HSAILISD::UMUL24"; + case HSAILISD::SMUL24: + return "HSAILISD::SMUL24"; + case HSAILISD::UMAD24: + return "HSAILISD::UMAD24"; + case HSAILISD::SMAD24: + return "HSAILISD::SMAD24"; + case HSAILISD::BITSELECT: + return "HSAILISD::BITSELECT"; + case HSAILISD::SBITEXTRACT: + return "HSAILISD::SBITEXTRACT"; + case HSAILISD::UBITEXTRACT: + return "HSAILISD::UBITEXTRACT"; + case HSAILISD::FLDEXP: + return "HSAILISD::FLDEXP"; + case HSAILISD::CLASS: + return "HSAILISD::CLASS"; + case HSAILISD::LDA: + return "HSAILISD::LDA"; + case HSAILISD::ACTIVELANEPERMUTE: + return "HSAILISD::ACTIVELANEPERMUTE"; + case HSAILISD::ACTIVELANEID: + return "HSAILISD::ACTIVELANEID"; + case HSAILISD::ACTIVELANECOUNT: + return "HSAILISD::ACTIVELANECOUNT"; + case HSAILISD::ACTIVELANEMASK: + return "HSAILISD::ACTIVELANEMASK"; + case HSAILISD::KERNARGBASEPTR: + return "HSAILISD::KERNARGBASEPTR"; + case HSAILISD::SEGMENTP: + return "HSAILISD::SEGMENTP"; + case HSAILISD::ARG_LD: + return "HSAILISD::ARG_LD"; + case HSAILISD::ARG_ST: + return "HSAILISD::ARG_ST"; + } +} + +//===--------------------------------------------------------------------===// +// Custom lowering methods +// + +SDValue HSAILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl = SDLoc(Op); + EVT VT = Op.getValueType(); + + if (VT != MVT::i1) { + return Op; + } + const SDValue src = Op.getOperand(0).getOperand(0); + EVT srcVT = src.getValueType(); + if (Op.getOperand(0).getOpcode() != ISD::TRUNCATE) + return Op; + + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Op.getOperand(1)); + SDValue Zext1 = DAG.getNode(ISD::ZERO_EXTEND, dl, srcVT, Op.getOperand(0)); + SDValue add_p = DAG.getNode(ISD::ADD, dl, srcVT, Zext1, Zext); + SDValue Zext2 = DAG.getNode(ISD::TRUNCATE, dl, VT, add_p); + return Zext2; +} + +static bool isRdimage(unsigned IntNo) { + switch (IntNo) { + case HSAILIntrinsic::HSAIL_rd_imgf_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_1d_f32: + // read image 1d array + case HSAILIntrinsic::HSAIL_rd_imgf_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_1da_f32: + // read image 2d + case HSAILIntrinsic::HSAIL_rd_imgf_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_2d_f32: + // read image 2d array + case HSAILIntrinsic::HSAIL_rd_imgf_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_2da_f32: + // read image 3d + case HSAILIntrinsic::HSAIL_rd_imgf_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_3d_f32: + // read image 2d depth + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_f32: + // read image 2d array depth + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_f32: + return true; + } + + return false; +} + +SDValue HSAILTargetLowering::LowerLdKernargIntrinsic(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + HSAILMachineFunctionInfo *FuncInfo = MF.getInfo(); + HSAILParamManager &PM = FuncInfo->getParamManager(); + const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + + EVT VT = Op.getValueType(); + Type *Ty = Type::getIntNTy(*DAG.getContext(), VT.getSizeInBits()); + SDValue Addr = Op.getOperand(1); + int64_t Offset = 0; + MVT PtrTy = getPointerTy(DL, HSAILAS::KERNARG_ADDRESS); + AAMDNodes ArgMD; // FIXME: What is this for? + if (ConstantSDNode *CAddr = dyn_cast(Addr)) { + Offset = CAddr->getSExtValue(); + // Match a constant address argument to the parameter through functions's + // argument map (taking argument alignment into account). + // Match is not possible if we are accesing beyond a known kernel argument + // space, if we accessing from a non-inlined function, or if there is an + // opaque argument with unknwon size before requested offset. + unsigned Param = UINT_MAX; + if (HSAIL::isKernelFunc(MF.getFunction())) + Param = PM.getParamByOffset(Offset); + + if (Param != UINT_MAX) { + Addr = DAG.getTargetExternalSymbol(PM.getParamName(Param), PtrTy); + // Value *mdops[] = { const_cast(PM.getParamArg(param)) }; + // ArgMD = MDNode::get(MF.getFunction()->getContext(), mdops); + } else { + Addr = SDValue(); + } + } + + SDValue Chain = DAG.getEntryNode(); + return getArgLoad(DAG, SDLoc(Op), VT, Ty, false, HSAILAS::KERNARG_ADDRESS, + Chain, Addr, SDValue(), 0, false, Offset); +} + +SDValue HSAILTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntrID = cast(Op->getOperand(0))->getZExtValue(); + SDLoc SL(Op); + + // FIXME: This is for compatability with old, custom HSAIL intrinsics. These + // should be removed once users are updated to use the LLVM intrinsics. + switch (IntrID) { + case HSAILIntrinsic::HSAIL_get_global_id: { + ConstantSDNode *Dim = dyn_cast(Op.getOperand(1)); + if (!Dim || Dim->getZExtValue() > 2) + return DAG.getUNDEF(Op.getValueType()); + return Op; + } + + case HSAILIntrinsic::HSAIL_abs_f32: + return DAG.getNode(ISD::FABS, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_abs_f64: + return DAG.getNode(ISD::FABS, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_rnd_f32: + return DAG.getNode(ISD::FRINT, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_rnd_f64: + return DAG.getNode(ISD::FRINT, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_floor_f32: + return DAG.getNode(ISD::FFLOOR, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_floor_f64: + return DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_ceil_f32: + return DAG.getNode(ISD::FCEIL, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_ceil_f64: + return DAG.getNode(ISD::FCEIL, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_trunc_f32: + return DAG.getNode(ISD::FTRUNC, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_trunc_f64: + return DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_fract_f32: + return DAG.getNode(HSAILISD::FRACT, SL, MVT::f32, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_fract_f64: + return DAG.getNode(HSAILISD::FRACT, SL, MVT::f64, Op.getOperand(1)); + + case HSAILIntrinsic::HSAIL_copysign_f32: + return DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_copysign_f64: + return DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_f32: + return DAG.getNode(ISD::FMINNUM, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_f64: + return DAG.getNode(ISD::FMINNUM, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_f32: + return DAG.getNode(ISD::FMAXNUM, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_f64: + return DAG.getNode(ISD::FMAXNUM, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_fma_f32: + return DAG.getNode(ISD::FMA, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_fma_f64: + return DAG.getNode(ISD::FMA, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_popcount_u32_b32: + return DAG.getNode(ISD::CTPOP, SL, MVT::i32, Op.getOperand(1)); + + case Intrinsic::hsail_nfma: + case HSAILIntrinsic::HSAIL_nfma_f32: + case HSAILIntrinsic::HSAIL_nfma_f64: + return DAG.getNode(HSAILISD::NFMA, SL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::hsail_bitselect: + case HSAILIntrinsic::HSAIL_bitselect_u32: + case HSAILIntrinsic::HSAIL_bitselect_u64: + return DAG.getNode(HSAILISD::BITSELECT, SL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::hsail_ubitextract: + case HSAILIntrinsic::HSAIL_bfe: + return DAG.getNode(HSAILISD::UBITEXTRACT, SL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::hsail_sbitextract: + case HSAILIntrinsic::HSAIL_ibfe: + return DAG.getNode(HSAILISD::SBITEXTRACT, SL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + // FIXME: There should be LLVM intrinsics for mulhs / mulhu. + case Intrinsic::hsail_smulhi: + case HSAILIntrinsic::HSAIL_mulhi_s32: + case HSAILIntrinsic::HSAIL_mulhi_s64: + return DAG.getNode(ISD::MULHS, SL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::hsail_umulhi: + case HSAILIntrinsic::HSAIL_mulhi_u32: + case HSAILIntrinsic::HSAIL_mulhi_u64: + return DAG.getNode(ISD::MULHU, SL, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_mad_u64: + return DAG.getNode(HSAILISD::UMAD, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_mad_u32: + return DAG.getNode(HSAILISD::UMAD, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_max_s32: + return DAG.getNode(HSAILISD::SMAX, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_u32: + return DAG.getNode(HSAILISD::UMAX, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_s32: + return DAG.getNode(HSAILISD::SMIN, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_u32: + return DAG.getNode(HSAILISD::UMIN, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_s64: + return DAG.getNode(HSAILISD::SMAX, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_max_u64: + return DAG.getNode(HSAILISD::UMAX, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_s64: + return DAG.getNode(HSAILISD::SMIN, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_min_u64: + return DAG.getNode(HSAILISD::UMIN, SL, MVT::i64, Op.getOperand(1), + Op.getOperand(2)); + + case Intrinsic::hsail_smul24: + case HSAILIntrinsic::HSAIL_mul24_s32: + return DAG.getNode(HSAILISD::SMUL24, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case Intrinsic::hsail_umul24: + case HSAILIntrinsic::HSAIL_mul24_u32: + return DAG.getNode(HSAILISD::UMUL24, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2)); + + case Intrinsic::hsail_smad24: + case HSAILIntrinsic::HSAIL_mad24_s32: + return DAG.getNode(HSAILISD::SMAD24, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::hsail_umad24: + case HSAILIntrinsic::HSAIL_mad24_u32: + return DAG.getNode(HSAILISD::UMAD24, SL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + + case HSAILIntrinsic::HSAIL_gcn_fldexp_f32: + return DAG.getNode(HSAILISD::FLDEXP, SL, MVT::f32, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_gcn_fldexp_f64: + return DAG.getNode(HSAILISD::FLDEXP, SL, MVT::f64, Op.getOperand(1), + Op.getOperand(2)); + + case HSAILIntrinsic::HSAIL_class_f32: + case HSAILIntrinsic::HSAIL_class_f64: { + // FIXME: The intrinsic should be i1 to begin with. + SDValue Class = DAG.getNode(HSAILISD::CLASS, SL, MVT::i1, Op.getOperand(1), + Op.getOperand(2)); + return DAG.getNode(ISD::SIGN_EXTEND, SL, MVT::i32, Class); + } + + case Intrinsic::hsail_segmentp: { + unsigned AS = cast(Op.getOperand(1))->getZExtValue(); + unsigned NoNull = cast(Op.getOperand(2))->getZExtValue(); + return DAG.getNode( + HSAILISD::SEGMENTP, SL, MVT::i1, + DAG.getTargetConstant(AS, SL, MVT::i32), + DAG.getTargetConstant(NoNull, SL, MVT::i1), + Op.getOperand(3)); + } + case HSAILIntrinsic::HSAIL_segmentp_global: { + return DAG.getNode( + HSAILISD::SEGMENTP, SL, MVT::i1, + DAG.getTargetConstant(HSAILAS::GLOBAL_ADDRESS, SL, MVT::i32), + DAG.getTargetConstant(0, SL, MVT::i1), Op.getOperand(1)); + } + case HSAILIntrinsic::HSAIL_segmentp_local: { + return DAG.getNode( + HSAILISD::SEGMENTP, SL, MVT::i1, + DAG.getTargetConstant(HSAILAS::GROUP_ADDRESS, SL, MVT::i32), + DAG.getTargetConstant(0, SL, MVT::i1), Op.getOperand(1)); + } + case HSAILIntrinsic::HSAIL_segmentp_private: { + return DAG.getNode( + HSAILISD::SEGMENTP, SL, MVT::i1, + DAG.getTargetConstant(HSAILAS::PRIVATE_ADDRESS, SL, MVT::i32), + DAG.getTargetConstant(0, SL, MVT::i1), Op.getOperand(1)); + } + case Intrinsic::hsail_kernargbaseptr: { + return DAG.getNode(HSAILISD::KERNARGBASEPTR, SL, Op.getValueType()); + } + case HSAILIntrinsic::HSAIL_ld_kernarg_u32: + case HSAILIntrinsic::HSAIL_ld_kernarg_u64: { + return LowerLdKernargIntrinsic(Op, DAG); + } + default: + return Op; + } +} + +SDValue HSAILTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = cast(Op->getOperand(1))->getZExtValue(); + SDLoc SL(Op); + + if (isRdimage(IntNo)) + return lowerSamplerInitializerOperand(Op, DAG); + + switch (IntNo) { + case Intrinsic::hsail_activelanepermute: { + const ConstantSDNode *Width = cast(Op.getOperand(2)); + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(Width->getZExtValue(), SL, MVT::i32), // width + Op.getOperand(3), // src0 + Op.getOperand(4), // src1 + Op.getOperand(5), // src2 + Op.getOperand(6) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, Op->getVTList(), Ops); + } + + case HSAILIntrinsic::HSAIL_activelanepermute_b32: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32), // width + Op.getOperand(2), // src0 + Op.getOperand(3), // src1 + Op.getOperand(4), // src2 + Op.getOperand(5) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanepermute_b64: { + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32), // width + Op.getOperand(2), // src0 + Op.getOperand(3), // src1 + Op.getOperand(4), // src2 + Op.getOperand(5) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanepermute_width_b32: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32), // width + Op.getOperand(2), // src0 + Op.getOperand(3), // src1 + Op.getOperand(4), // src2 + Op.getOperand(5) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanepermute_width_b64: { + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32), // width + Op.getOperand(2), // src0 + Op.getOperand(3), // src1 + Op.getOperand(4), // src2 + Op.getOperand(5) // src3 + }; + + return DAG.getNode(HSAILISD::ACTIVELANEPERMUTE, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelaneid_u32: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32) // width + }; + + return DAG.getNode(HSAILISD::ACTIVELANEID, SL, VTs, Ops); + } + + case Intrinsic::hsail_activelaneid: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + const ConstantSDNode *Width = cast(Op.getOperand(2)); + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(Width->getZExtValue(), SL, MVT::i32) // width + }; + + return DAG.getNode(HSAILISD::ACTIVELANEID, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelaneid_width_u32: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32) // width + }; + + return DAG.getNode(HSAILISD::ACTIVELANEID, SL, VTs, Ops); + } + + case Intrinsic::hsail_activelanecount: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + const ConstantSDNode *Width = cast(Op.getOperand(2)); + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(Width->getZExtValue(), SL, MVT::i32), // width + Op.getOperand(3) + }; + + return DAG.getNode(HSAILISD::ACTIVELANECOUNT, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanecount_u32_b1: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32), // width + Op.getOperand(2) + }; + + return DAG.getNode(HSAILISD::ACTIVELANECOUNT, SL, VTs, Ops); + } + + case HSAILIntrinsic::HSAIL_activelanecount_width_u32_b1: { + SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); + + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32), // width + Op.getOperand(2) + }; + + return DAG.getNode(HSAILISD::ACTIVELANECOUNT, SL, VTs, Ops); + } + + case Intrinsic::hsail_activelanemask: { + const ConstantSDNode *Width = cast(Op.getOperand(2)); + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(Width->getZExtValue(), SL, MVT::i32), // width + Op.getOperand(3) + }; + + return DAG.getNode(HSAILISD::ACTIVELANEMASK, SL, Op->getVTList(), Ops); + } + + case HSAILIntrinsic::HSAIL_activelanemask_v4_b64_b1: { + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_1, SL, MVT::i32), // width + Op.getOperand(2) + }; + + return DAG.getNode(HSAILISD::ACTIVELANEMASK, SL, Op->getVTList(), Ops); + } + + case HSAILIntrinsic::HSAIL_activelanemask_v4_width_b64_b1: { + const SDValue Ops[] = { + Op.getOperand(0), // Chain + DAG.getTargetConstant(BRIG_WIDTH_WAVESIZE, SL, MVT::i32), // width + Op.getOperand(2) + }; + + return DAG.getNode(HSAILISD::ACTIVELANEMASK, SL, Op->getVTList(), Ops); + } + + default: + return Op; + } +} + +/// \brief Replace sampler initializer with sampler handle from +/// readonly segment, potentially creating a new handle. +SDValue +HSAILTargetLowering::lowerSamplerInitializerOperand(SDValue Op, + SelectionDAG &DAG) const { + const unsigned SAMPLER_ARG = 3; + SDValue sampler = Op.getOperand(SAMPLER_ARG); + + // The sampler operand is an initializer if it is constant and less than + // IMAGE_ARG_BIAS. + if (!isa(sampler)) + return Op; + + unsigned samplerConstant = cast(sampler)->getZExtValue(); + if (samplerConstant >= IMAGE_ARG_BIAS) + return Op; + + // This is a sampler initializer. + // Find or create sampler handle based on init val. + unsigned samplerHandleIndex = + Subtarget->getImageHandles()->findOrCreateSamplerHandle(samplerConstant); + + // Provided that this is simply int const we can assume it is not going to be + // changed, so we use readonly segment for the sampler. + // According to OpenCL spec samplers cannot be modified, so that is safe for + // OpenCL. If we are going to support modifiable or non-OpenCL samplers most + // likely the whole support code will need change. + Subtarget->getImageHandles()->getSamplerHandle(samplerHandleIndex)->setRO(); + + SDValue ops[16]; + for (unsigned i = 0; i < Op.getNumOperands(); i++) { + ops[i] = Op.getOperand(i); + } + + SDLoc SL(Op); + + MachineFunction &MF = DAG.getMachineFunction(); + const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout(); + + // FIXME: Get correct address space pointer type. + SDValue Ops[] = { + DAG.getTargetConstant(samplerHandleIndex, SL, MVT::i32), + DAG.getRegister(HSAIL::NoRegister, getPointerTy(DL)), + DAG.getTargetConstant(0, SL, MVT::i32), + DAG.getTargetConstant(BRIG_TYPE_SAMP, SL, MVT::i32), + DAG.getTargetConstant(BRIG_WIDTH_ALL, SL, MVT::i32), + DAG.getTargetConstant(1, SL, MVT::i1), // Const + DAG.getEntryNode() // Chain + }; + + EVT VT = sampler.getValueType(); + + // Don't use ptr32 since this is the readonly segment. + MachineSDNode *LDSamp = + DAG.getMachineNode(HSAIL::LD_SAMP, SDLoc(Op), VT, MVT::Other, Ops); + + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + unsigned size = VT.getStoreSize(); + Type *PTy = VT.getTypeForEVT(*DAG.getContext()); + PointerType *PT = PointerType::get(PTy, HSAILAS::READONLY_ADDRESS); + MachinePointerInfo MPtrInfo(UndefValue::get(PT), size * samplerHandleIndex); + MemOp[0] = + MF.getMachineMemOperand(MPtrInfo, MachineMemOperand::MOLoad, size, size); + LDSamp->setMemRefs(MemOp, MemOp + 1); + + ops[SAMPLER_ARG] = SDValue(LDSamp, 0); + + DAG.UpdateNodeOperands(Op.getNode(), makeArrayRef(ops, Op.getNumOperands())); + + return Op; +} + +SDValue HSAILTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl = SDLoc(Op); + EVT VT = Op.getValueType(); + + if (VT != MVT::i32) { + return Op; + } + + SDValue IntrID + = DAG.getTargetConstant(HSAILIntrinsic::HSAIL_bitalign_b32, dl, MVT::i32); + const SDValue src0 = Op.getOperand(0); + const SDValue src1 = Op.getOperand(1); + const ConstantSDNode *shift = dyn_cast(src1); + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + IntrID, + src0, src0, + shift ? DAG.getConstant(32 - (shift->getZExtValue() & 31), dl, MVT::i32) + : DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), src1)); +} + +SDValue HSAILTargetLowering::LowerROTR(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl = SDLoc(Op); + EVT VT = Op.getValueType(); + + if (VT != MVT::i32) { + return Op; + } + const SDValue src0 = Op.getOperand(0); + const SDValue src1 = Op.getOperand(1); + SDValue IntrID + = DAG.getTargetConstant(HSAILIntrinsic::HSAIL_bitalign_b32, dl, MVT::i32); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, IntrID, src0, src0, src1); +} + +SDValue HSAILTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl = SDLoc(Op); + EVT VT = Op.getValueType(); + + if (VT != MVT::i32) { + return Op; + } + const SDValue src = Op.getOperand(0); + const SDValue opr0 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(HSAILIntrinsic::HSAIL_bytealign_b32, dl, MVT::i32), + src, src, DAG.getConstant(3, dl, MVT::i32)); + const SDValue opr1 = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(HSAILIntrinsic::HSAIL_bytealign_b32, dl, MVT::i32), + src, src, DAG.getConstant(1, dl, MVT::i32)); + return DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getTargetConstant(HSAILIntrinsic::HSAIL_bitselect_u32, dl, MVT::i32), + DAG.getConstant(0x00ff00ff, dl, VT), opr0, opr1); +} + +SDValue HSAILTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + + EVT VT = Op.getValueType(); + LoadSDNode *LD = cast(Op); + + if (VT.getSimpleVT() == MVT::i1) { + // Since there are no 1 bit load operations, the load operations are + // converted to 8 bit loads. + // First, do 8 bit load into 32 bits with sign extension, then + // truncate to 1 bit. + LoadSDNode *LD = cast(Op); + SDValue NewLD = + DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, LD->getChain(), + LD->getBasePtr(), MVT::i8, LD->getMemOperand()); + + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); + SDValue Ops[] = {Result, NewLD.getValue(1)}; + + return DAG.getMergeValues(Ops, dl); + } + + // Custom lowering for extload from sub-dword size to i64. We only + // do it because LLVM currently does not support Expand for EXTLOAD + // with illegal types. + // See "EXTLOAD should always be supported!" assert in LegalizeDAG.cpp. + if (VT.getSimpleVT() != MVT::i64) + return Op; + ISD::LoadExtType extType = LD->getExtensionType(); + + if (extType == ISD::SEXTLOAD && LD->hasNUsesOfValue(1, 0)) { + // Check if the only use is a truncation to the size of loaded memory. + // In this case produce zext instead of sext. Note, that load chain + // has its own use. + SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); + for (; UI != UE; ++UI) { + if (UI.getUse().getResNo() == 0) { + // User of a loaded value. + if (UI->getOpcode() == ISD::AND && + isa(UI->getOperand(1))) { + EVT MemVT = LD->getMemoryVT(); + uint64_t Mask = UI->getConstantOperandVal(1); + if ((MemVT == MVT::i8 && Mask == 0xFFul) || + (MemVT == MVT::i16 && Mask == 0xFFFFul)) { + // The AND operator was not really needed. Produce zextload as it + // does + // not change the result and let AND node silintly die. + extType = ISD::ZEXTLOAD; + } + } + break; + } + } + } + + // Do extload into 32-bit register, then extend that. + SDValue NewLD = + DAG.getExtLoad(extType, dl, MVT::i32, LD->getChain(), LD->getBasePtr(), + MVT::i8, LD->getMemOperand()); + + SDValue Ops[] = { + DAG.getNode(ISD::getExtForLoadExtType(false, extType), dl, MVT::i64, NewLD), + NewLD.getValue(1) + }; + + // Replace chain in all uses. + // XXX: Do we really need to do this? + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), NewLD.getValue(1)); + + return DAG.getMergeValues(Ops, dl); +} + +SDValue HSAILTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + // Since there are no 1 bit store operations, the store operations are + // converted to 8 bit stores. + // First, sign extend to 32 bits, then use a truncating store to 8 bits. + + SDLoc dl = SDLoc(Op); + StoreSDNode *ST = cast(Op); + + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + SDValue Value = ST->getValue(); + MachineMemOperand *MMO = ST->getMemOperand(); + + assert(Value.getValueType() == MVT::i1 && + "Custom lowering only for i1 stores"); + + Value = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Value); + return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); +} + +static SDValue getMemFenceImpl(SDValue Chain, SDLoc SL, unsigned MemoryOrder, + unsigned GlobalMemoryScope, + unsigned GroupMemoryScope, + unsigned ImageMemoryScope, + SelectionDAG &CurDAG) { + const SDValue Ops[] = { + Chain, + CurDAG.getTargetConstant(HSAILIntrinsic::HSAIL_memfence, SL, MVT::i64), + CurDAG.getConstant(MemoryOrder, SL, MVT::i32), + CurDAG.getConstant(GlobalMemoryScope, SL, MVT::i32), + CurDAG.getConstant(GroupMemoryScope, SL, MVT::i32), + CurDAG.getConstant(ImageMemoryScope, SL, MVT::i32) + }; + + return CurDAG.getNode(ISD::INTRINSIC_VOID, SL, MVT::Other, Ops); +} + +static SDValue getMemFence(SDValue Chain, SDLoc SL, unsigned AS, + unsigned MemoryOrder, unsigned MemoryScope, + SelectionDAG &CurDAG) { + switch (AS) { + case HSAILAS::GLOBAL_ADDRESS: + return getMemFenceImpl(Chain, SL, MemoryOrder, MemoryScope, + BRIG_MEMORY_SCOPE_NONE, BRIG_MEMORY_SCOPE_NONE, + CurDAG); + + case HSAILAS::GROUP_ADDRESS: + return getMemFenceImpl(Chain, SL, MemoryOrder, MemoryScope, MemoryScope, + BRIG_MEMORY_SCOPE_NONE, CurDAG); + + case HSAILAS::FLAT_ADDRESS: + return getMemFenceImpl(Chain, SL, MemoryOrder, MemoryScope, + BRIG_MEMORY_SCOPE_WORKGROUP, BRIG_MEMORY_SCOPE_NONE, + CurDAG); + + default: + llvm_unreachable("unexpected memory segment"); + } +} + +SDValue HSAILTargetLowering::LowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG) const { + // HSAIL doesnt support SequentiallyConsistent, + // lower an atomic load with SequentiallyConsistent memory order + // to a Release memfence and Acquire atomic load + AtomicSDNode *Node = cast(Op); + + if (Node->getOrdering() != SequentiallyConsistent) + return Op; + + unsigned brigMemoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE; + unsigned brigMemoryScope = Node->getAddressSpace() == HSAILAS::GROUP_ADDRESS + ? BRIG_MEMORY_SCOPE_WORKGROUP + : BRIG_MEMORY_SCOPE_SYSTEM; + + SDLoc SL(Op); + + SDValue Chain = getMemFence(Op.getOperand(0), Op, Node->getAddressSpace(), + brigMemoryOrder, brigMemoryScope, DAG); + + return DAG.getAtomic(ISD::ATOMIC_LOAD, SL, Node->getMemoryVT(), + Op.getValueType(), Chain, Node->getBasePtr(), + Node->getMemOperand(), Acquire, Node->getSynchScope()); +} + +SDValue HSAILTargetLowering::LowerATOMIC_STORE(SDValue Op, + SelectionDAG &DAG) const { + // HSAIL doesnt support SequentiallyConsistent, + // lower an atomic store with SequentiallyConsistent memory order + // to Release atomic store and Acquire memfence + AtomicSDNode *Node = cast(Op); + + if (Node->getOrdering() != SequentiallyConsistent) + return Op; + + unsigned MemoryOrder = BRIG_MEMORY_ORDER_SC_ACQUIRE; + unsigned MemoryScope = Node->getAddressSpace() == HSAILAS::GROUP_ADDRESS + ? BRIG_MEMORY_SCOPE_WORKGROUP + : BRIG_MEMORY_SCOPE_SYSTEM; + + SDLoc SL(Op); + + SDValue ResNode = + DAG.getAtomic(ISD::ATOMIC_STORE, SL, Node->getMemoryVT(), + Node->getOperand(0), // Chain + Node->getBasePtr(), Node->getVal(), Node->getMemOperand(), + Release, Node->getSynchScope()); + return getMemFence(ResNode, Op, Node->getAddressSpace(), MemoryOrder, + MemoryScope, DAG); +} + +//===--------------------------------------------------------------------===// +bool HSAILTargetLowering::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, + Type *Ty, + unsigned AddrSpace) const { + if (Subtarget->isGCN()) { + // Do not generate negative offsets as they can not be folded into + // instructions. + if (AM.BaseOffs < 0 || AM.Scale < 0) + return false; + } + + return TargetLowering::isLegalAddressingMode(DL, AM, Ty, AddrSpace); +} + +bool HSAILTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { + return false; +} + +bool HSAILTargetLowering::isZExtFree(EVT VT1, EVT VT2) const { return false; } + +bool HSAILTargetLowering::isFAbsFree(EVT VT) const { return true; } + +bool HSAILTargetLowering::isFNegFree(EVT VT) const { return true; } + +bool HSAILTargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { + // This is only profitable in HSAIL to go from a 64bit type to + // a 32bit type, but not to a 8 or 16bit type. + return (VT1 == EVT(MVT::i64) && VT2 == EVT(MVT::i32)) || + (VT1 == EVT(MVT::f64) && VT2 == EVT(MVT::f32)); +} + +bool HSAILTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + // HSAIL doesn't have any restrictions on this. + return true; +} + +MVT HSAILTargetLowering::getScalarShiftAmountTy(const DataLayout &DL, + EVT LHSTy) const { + // Shift amounts in registers must be in S registers + // Restrict shift amount to 32-bits. + return MVT::i32; +} + +bool HSAILTargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + return (SrcAS == HSAILAS::FLAT_ADDRESS && + DestAS == HSAILAS::GLOBAL_ADDRESS) || + (SrcAS == HSAILAS::GLOBAL_ADDRESS && DestAS == HSAILAS::FLAT_ADDRESS); +} + +void HSAILTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + const HSAILInstrInfo *TII = + static_cast(Subtarget->getInstrInfo()); + + if (TII->isInstAtomic(MI->getOpcode()) && !Node->hasAnyUseOfValue(0)) { + int NoRetAtomicOp = HSAIL::getAtomicNoRetOp(MI->getOpcode()); + if (NoRetAtomicOp != -1) { + MI->setDesc(TII->get(NoRetAtomicOp)); + MI->RemoveOperand(0); + } + + return; + } +} + +bool HSAILTargetLowering::isLoadBitCastBeneficial(EVT lVT, EVT bVT) const { + return !(lVT.getSizeInBits() == bVT.getSizeInBits() && + lVT.getScalarType().getSizeInBits() > + bVT.getScalarType().getSizeInBits() && + bVT.getScalarType().getSizeInBits() < 32 && + lVT.getScalarType().getSizeInBits() >= 32); +} + +bool HSAILTargetLowering::isVectorToScalarLoadStoreWidenBeneficial( + unsigned Width, EVT WidenVT, const MemSDNode *N) const { + unsigned WidenWidth = WidenVT.getSizeInBits(); + + // In HSAIL we have _v3 loads and stores, and in case of uneven vector size + // it is more effective to use one _v3 load instead of several _v1 loads + // For example for vector load of 3 integers: + // ld_v1_u64 + // ld_v1_u32 + // Is worse than: + // ld_v3_u32 + if ((Width * 4 / 3) == WidenWidth) + return false; + return true; +} Index: lib/Target/HSAIL/HSAILImages.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILImages.td @@ -0,0 +1,356 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// image operations + +let isImageInst=1 in { + +// Image read +class ReadImage1D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, $sampler, $coordWidth"), []>; + +class ReadImage2D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, $sampler, ( $coordWidth, $coordHeight )"), []>; + + +class ReadImage3D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordDepth), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, $sampler, ( $coordWidth, $coordHeight, $coordDepth )"), []>; + +class ReadImage2DDepth + : HSAILInst<(outs GPR32:$destR), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, + "( $destR ), " + "$image, $sampler, ( $coordWidth, $coordHeight )"), []>; + +class ReadImage2DArrayDepth + : HSAILInst<(outs GPR32:$destR), + (ins GPR64:$image, GPR64:$sampler, GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordArrayIndex), + !strconcat(asm, + "( $destR ), " + "$image, $sampler, ( $coordWidth, $coordHeight, $coordArrayIndex )"), []>; + +// read image 1d +def RDIMAGE : HSAILInstImage_Ld_1D<"rdimage", BrigOpcode.RDIMAGE>; + + +def rd_imgf_1d_s32 : ReadImage1D<"rdimage_v4_1d_f32_roimg_s32">; +def rd_imgf_1d_f32 : ReadImage1D<"rdimage_v4_1d_f32_roimg_f32">; +def rd_imgi_1d_s32 : ReadImage1D<"rdimage_v4_1d_s32_roimg_s32">; +def rd_imgi_1d_f32 : ReadImage1D<"rdimage_v4_1d_s32_roimg_f32">; +def rd_imgui_1d_s32 : ReadImage1D<"rdimage_v4_1d_u32_roimg_s32">; +def rd_imgui_1d_f32 : ReadImage1D<"rdimage_v4_1d_u32_roimg_f32">; + +// read image 1da + +def rd_imgf_1da_s32 : ReadImage2D<"rdimage_v4_1da_f32_roimg_s32">; +def rd_imgf_1da_f32 : ReadImage2D<"rdimage_v4_1da_f32_roimg_f32">; +def rd_imgi_1da_s32 : ReadImage2D<"rdimage_v4_1da_s32_roimg_s32">; +def rd_imgi_1da_f32 : ReadImage2D<"rdimage_v4_1da_s32_roimg_f32">; +def rd_imgui_1da_s32 : ReadImage2D<"rdimage_v4_1da_u32_roimg_s32">; +def rd_imgui_1da_f32 : ReadImage2D<"rdimage_v4_1da_u32_roimg_f32">; + +// read image 2d + +def rd_imgf_2d_s32 : ReadImage2D<"rdimage_v4_2d_f32_roimg_s32">; +def rd_imgf_2d_f32 : ReadImage2D<"rdimage_v4_2d_f32_roimg_f32">; +def rd_imgi_2d_s32 : ReadImage2D<"rdimage_v4_2d_s32_roimg_s32">; +def rd_imgi_2d_f32 : ReadImage2D<"rdimage_v4_2d_s32_roimg_f32">; +def rd_imgui_2d_s32 : ReadImage2D<"rdimage_v4_2d_u32_roimg_s32">; +def rd_imgui_2d_f32 : ReadImage2D<"rdimage_v4_2d_u32_roimg_f32">; + + +// read image 2da + +def rd_imgf_2da_s32 : ReadImage3D<"rdimage_v4_2da_f32_roimg_s32">; +def rd_imgf_2da_f32 : ReadImage3D<"rdimage_v4_2da_f32_roimg_f32">; +def rd_imgi_2da_s32 : ReadImage3D<"rdimage_v4_2da_s32_roimg_s32">; +def rd_imgi_2da_f32 : ReadImage3D<"rdimage_v4_2da_s32_roimg_f32">; +def rd_imgui_2da_s32 : ReadImage3D<"rdimage_v4_2da_u32_roimg_s32">; +def rd_imgui_2da_f32 : ReadImage3D<"rdimage_v4_2da_u32_roimg_f32">; + +// read image 3d + +def rd_imgf_3d_s32 : ReadImage3D<"rdimage_v4_3d_f32_roimg_s32">; +def rd_imgf_3d_f32 : ReadImage3D<"rdimage_v4_3d_f32_roimg_f32">; +def rd_imgi_3d_s32 : ReadImage3D<"rdimage_v4_3d_s32_roimg_s32">; +def rd_imgi_3d_f32 : ReadImage3D<"rdimage_v4_3d_s32_roimg_f32">; +def rd_imgui_3d_s32 : ReadImage3D<"rdimage_v4_3d_u32_roimg_s32">; +def rd_imgui_3d_f32 : ReadImage3D<"rdimage_v4_3d_u32_roimg_f32">; + +// OpenCL 2.0 rd2ddepth + +def rd_imgf_2ddepth_s32 : ReadImage2DDepth<"rdimage_2ddepth_f32_roimg_s32">; +def rd_imgf_2ddepth_f32 : ReadImage2DDepth<"rdimage_2ddepth_f32_roimg_f32">; + +// OpenCL 2.0 rd2dadepth + +def rd_imgf_2dadepth_s32 : ReadImage2DArrayDepth<"rdimage_2dadepth_f32_roimg_s32">; +def rd_imgf_2dadepth_f32 : ReadImage2DArrayDepth<"rdimage_2dadepth_f32_roimg_f32">; + + +// Image loads +class LoadImage1D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR32:$coordWidth), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, $coordWidth"), []>; + +class LoadImage2D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, ( $coordWidth, $coordHeight )"), []>; + +class LoadImage3D + : HSAILInst<(outs GPR32:$destR, GPR32:$destG, GPR32:$destB, GPR32:$destA), + (ins GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordDepth), + !strconcat(asm, + "( $destR, $destG, $destB, $destA ), " + "$image, ( $coordWidth, $coordHeight, $coordDepth )"), []>; + +class LoadImage2DDepth + : HSAILInst<(outs GPR32:$destR), + (ins GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, + "( $destR ), " + "$image, ( $coordWidth, $coordHeight )"), []>; + +class LoadImage2DArrayDepth + : HSAILInst<(outs GPR32:$destR), + (ins GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordArrayIndex), + !strconcat(asm, + "( $destR ), " + "$image, ( $coordWidth, $coordHeight, $coordArrayIndex )"), []>; + +// load image 1d + +def ld_imgf_1d_u32 : LoadImage1D<"ldimage_v4_1d_f32_rwimg_u32">; +def ld_imgi_1d_u32 : LoadImage1D<"ldimage_v4_1d_s32_rwimg_u32">; +def ld_imgui_1d_u32 : LoadImage1D<"ldimage_v4_1d_u32_rwimg_u32">; + +// load image 1d buffer + +def ld_imgf_1db_u32 : LoadImage1D<"ldimage_v4_1db_f32_rwimg_u32">; +def ld_imgi_1db_u32 : LoadImage1D<"ldimage_v4_1db_s32_rwimg_u32">; +def ld_imgui_1db_u32 : LoadImage1D<"ldimage_v4_1db_u32_rwimg_u32">; + +// load image 1d array + +def ld_imgf_1da_u32 : LoadImage2D<"ldimage_v4_1da_f32_rwimg_u32">; +def ld_imgi_1da_u32 : LoadImage2D<"ldimage_v4_1da_s32_rwimg_u32">; +def ld_imgui_1da_u32 : LoadImage2D<"ldimage_v4_1da_u32_rwimg_u32">; + +// load image 2d + +def ld_imgf_2d_u32 : LoadImage2D<"ldimage_v4_2d_f32_rwimg_u32">; +def ld_imgi_2d_u32 : LoadImage2D<"ldimage_v4_2d_s32_rwimg_u32">; +def ld_imgui_2d_u32 : LoadImage2D<"ldimage_v4_2d_u32_rwimg_u32">; + +// load image 2d array + +def ld_imgf_2da_u32 : LoadImage3D<"ldimage_v4_2da_f32_rwimg_u32">; +def ld_imgi_2da_u32 : LoadImage3D<"ldimage_v4_2da_s32_rwimg_u32">; +def ld_imgui_2da_u32 : LoadImage3D<"ldimage_v4_2da_u32_rwimg_u32">; + +// load image 3d + +def ld_imgf_3d_u32 : LoadImage3D<"ldimage_v4_3d_f32_rwimg_u32">; +def ld_imgi_3d_u32 : LoadImage3D<"ldimage_v4_3d_s32_rwimg_u32">; +def ld_imgui_3d_u32 : LoadImage3D<"ldimage_v4_3d_u32_rwimg_u32">; + +// load image 2ddepth +def ld_imgf_2ddepth_u32 : LoadImage2DDepth<"ldimage_2ddepth_f32_rwimg_u32">; + +// load image 2dadepth +def ld_imgf_2dadepth_u32 : LoadImage2DArrayDepth<"ldimage_2dadepth_f32_rwimg_u32">; + +// Image store +class StImage1d : HSAILInst<(outs), + (ins GPR32:$srcR, GPR32:$srcG, GPR32:$srcB, GPR32:$srcA, + GPR64:$image, GPR32:$coordWidth), + !strconcat(asm, "\t ( $srcR, $srcG, $srcB, $srcA ), $image, " + "$coordWidth"), + [(intr (ValTy GPR32:$srcR), (ValTy GPR32:$srcG), + (ValTy GPR32:$srcB), (ValTy GPR32:$srcA), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth))]>; + +class StImage2d : HSAILInst<(outs), + (ins GPR32:$srcR, GPR32:$srcG, GPR32:$srcB, GPR32:$srcA, + GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, "\t ( $srcR, $srcG, $srcB, $srcA ), $image, " + "( $coordWidth, $coordHeight )"), + [(intr (ValTy GPR32:$srcR), (ValTy GPR32:$srcG), + (ValTy GPR32:$srcB), (ValTy GPR32:$srcA), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth), (CoordTy GPR32:$coordHeight))]>; + +// FIXME: What is $src9? I don't see it in the spec. +class StImage3d : HSAILInst<(outs), + (ins GPR32:$srcR, GPR32:$srcG, GPR32:$srcB, GPR32:$srcA, + GPR64:$image, + GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordDepth, GPR32:$src9), + !strconcat(asm, "\t ( $srcR, $srcG, $srcB, $srcA ), $image, " + "( $coordWidth, $coordHeight, $coordDepth, $src9 )"), + [(intr (ValTy GPR32:$srcR), (ValTy GPR32:$srcG), + (ValTy GPR32:$srcB), (ValTy GPR32:$srcA), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth), (CoordTy GPR32:$coordHeight), + (CoordTy GPR32:$coordDepth), (CoordTy GPR32:$src9))]>; + +//image 2.0 2d depth +class StImage2dDepth : HSAILInst<(outs), + (ins GPR32:$srcR, + GPR64:$image, GPR32:$coordWidth, GPR32:$coordHeight), + !strconcat(asm, "\t $srcR, $image, " + "( $coordWidth, $coordHeight )"), + [(intr (ValTy GPR32:$srcR), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth), (CoordTy GPR32:$coordHeight))]>; + +// FIXME: What is $src6? I don't see it in the spec. +//image 2.0 2d array depth +class StImage2dArrayDepth : HSAILInst<(outs), + (ins GPR32:$srcR, + GPR64:$image, + GPR32:$coordWidth, GPR32:$coordHeight, GPR32:$coordArrayIndex, GPR32:$src6), + !strconcat(asm, "\t $srcR, $image, " + "( $coordWidth, $coordHeight, $coordArrayIndex, $src6 )"), + [(intr (ValTy GPR32:$srcR), + (i64 GPR64:$image), + (CoordTy GPR32:$coordWidth), (CoordTy GPR32:$coordHeight), + (CoordTy GPR32:$coordArrayIndex), (CoordTy GPR32:$src6))]>; + +// store image 1d +def stimagef_1d_i32 : StImage1d; +def stimagei_1d_i32 : StImage1d; +def stimageui_1d_i32 : StImage1d; + +// store image 1d array +def stimagef_1da_i32 : StImage2d; +def stimagei_1da_i32 : StImage2d; +def stimageui_1da_i32 : StImage2d; + +// store image 1d buffer +def stimagef_1db_i32 : StImage1d; +def stimagei_1db_i32 : StImage1d; +def stimageui_1db_i32 : StImage1d; + +// store image 2d +def stimagef_2d_i32 : StImage2d; +def stimagei_2d_i32 : StImage2d; +def stimageui_2d_i32 : StImage2d; + +// store image 2d array +def stimagef_2da_i32 : StImage3d; +def stimagei_2da_i32 : StImage3d; +def stimageui_2da_i32 : StImage3d; + +// store image 3d +def stimagef_3d_i32 : StImage3d; +def stimagei_3d_i32 : StImage3d; +def stimageui_3d_i32 : StImage3d; + +// store image 2d depth +def stimagef_2ddepth_i32 : StImage2dDepth; + +// Store image 2d array depth +def stimagef_2dadepth_i32 : StImage2dArrayDepth; + +} // isImageInst end + +// Query image +class QueryImage : HSAILInst< + (outs GPR32:$dest), (ins GPR64:$image), + !strconcat(asm, "\t $dest, $image"), + [(set (i32 GPR32:$dest), (intr (i64 GPR64:$image)))]>; + +def query_width_1d : QueryImage; +def query_width_1db: QueryImage; +def query_width_1da: QueryImage; +def query_width_2d : QueryImage; +def query_width_2da: QueryImage; +def query_width_3d : QueryImage; +def query_width_2ddepth : QueryImage; +def query_width_2dadepth : QueryImage; + + +def query_height_2d : QueryImage; +def query_height_2da: QueryImage; +def query_height_3d : QueryImage; +def query_height_2ddepth : QueryImage; +def query_height_2dadepth : QueryImage; + +def query_depth : QueryImage; + +def query_format_1d : QueryImage; +def query_format_1db: QueryImage; +def query_format_1da: QueryImage; +def query_format_2d : QueryImage; +def query_format_2da: QueryImage; +def query_format_3d : QueryImage; + +def query_order_1d : QueryImage; +def query_order_1db: QueryImage; +def query_order_1da: QueryImage; +def query_order_2d : QueryImage; +def query_order_2da: QueryImage; +def query_order_3d : QueryImage; + +def query_array_1da: QueryImage; +def query_array_2da: QueryImage; +def query_array_2dadepth : QueryImage; + +def query_channelorder_2ddepth : QueryImage; +def query_channelorder_2dadepth : QueryImage; + +def query_channeltype_2ddepth : QueryImage; +def query_channeltype_2dadepth : QueryImage; + Index: lib/Target/HSAIL/HSAILInstrFormats.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstrFormats.td @@ -0,0 +1,1758 @@ +//===------ HSAILInstrFormats.td ---------------------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +// TODO HSA : fix so that a space is not emitted prior to end of +// statement semi-colon + +class HSAILInst pattern, + bits<16> BrigVal = 0, + bit appendSemicolon = 1> + : Instruction { + field bits<32> Inst; + let Namespace = "HSAIL"; + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = !if(appendSemicolon, !strconcat(asmstr, ";"), asmstr); + let Pattern = pattern; + + let UseNamedOperandTable = 1; + let Inst{15-0} = BrigVal; + + // Target specific flags + // Important! Keep in sync with HSAIL.h::HSAILTSFLAG + + // Instruction classes. See BrigKind. + bit InstAddr = 0; + bit InstAtomic = 0; + bit InstBasic = 0; + bit InstBr = 0; + bit InstCmp = 0; + bit InstCvt = 0; + bit InstImage = 0; + bit InstLane = 0; + bit InstMem = 0; + bit InstMemFence = 0; + bit InstMod = 0; + bit InstQueryImage = 0; + bit InstQuerySampler = 0; + bit InstQueue = 0; + bit InstSeg = 0; + bit InstSegCvt = 0; + bit InstSignal = 0; + bit InstSourceType = 0; + + // FIXME: Remove these + bit isConv = 0; + bit isImageInst = 0; + + bits<2> RoundAttr = 0; + + // Most instructions with a width modifier default to 1. + bits<2> WidthAttr = WidthAttrValues.ONE; + + bit HasDefaultSegment = 0; + + let TSFlags{3} = InstAddr; + let TSFlags{4} = InstAtomic; + let TSFlags{5} = InstBasic; + let TSFlags{6} = InstBr; + let TSFlags{7} = InstCmp; + let TSFlags{8} = InstCvt; + let TSFlags{9} = InstImage; + let TSFlags{10} = InstLane; + let TSFlags{11} = InstMem; + let TSFlags{12} = InstMemFence; + let TSFlags{13} = InstMod; + let TSFlags{14} = InstQueryImage; + let TSFlags{15} = InstQuerySampler; + let TSFlags{16} = InstQueue; + let TSFlags{17} = InstSeg; + let TSFlags{18} = InstSegCvt; + let TSFlags{19} = InstSignal; + let TSFlags{20} = InstSourceType; + + let TSFlags{23} = isConv; + let TSFlags{24} = isImageInst; + + let TSFlags{26-25} = RoundAttr; + let TSFlags{28-27} = WidthAttr; + + let TSFlags{29} = HasDefaultSegment; + + // Store the opcode here because we want access to it in + // BRIGAsmPrinter, but can't really use the normal MC binary + // encoding method to do so. + let TSFlags{63-48} = BrigVal; +} + +//////////////////////////////////////////////////////////////////////////////// +// Basic instruction formats +//////////////////////////////////////////////////////////////////////////////// +// InstBasic +class HSAILInstBasic BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstBasic = 1; +} + +class HSAILInstBasic_0Op_NoRet BrigVal, HSAILProfile P> : + HSAILInstBasic { + let hasSideEffects = 1; +} + +class HSAILInstBasic_0Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +multiclass InstBasic_0Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstBasic_0Op; + def _U64 : HSAILInstBasic_0Op; +} + +class HSAILInstBasic_1Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +multiclass InstBasic_1Op_FPTypes BrigVal> { + def _F32 : HSAILInstBasic_1Op; + def _F64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_SignedIntTypes BrigVal> { + def _S32 : HSAILInstBasic_1Op; + def _S64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstBasic_1Op; + def _U64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_BitTypes BrigVal> { + def _B1 : HSAILInstBasic_1Op; + def _B32 : HSAILInstBasic_1Op; + def _B64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_SF BrigVal> { + def _F32 : HSAILInstBasic_1Op; + def _F64 : HSAILInstBasic_1Op; + + def _S32 : HSAILInstBasic_1Op; + def _S64 : HSAILInstBasic_1Op; +} + +multiclass InstBasic_1Op_BF BrigVal> { + def _F32 : HSAILInstBasic_1Op; + def _F64 : HSAILInstBasic_1Op; + + def _B1 : HSAILInstBasic_1Op; + def _B32 : HSAILInstBasic_1Op; + def _B64 : HSAILInstBasic_1Op; +} + +// Instructions like gridsize which always use an i32 immediate source +multiclass InstBasic_1Op_U32ImmSrc BrigVal> { + def _U32 : HSAILInstBasic_1Op; + def _U64 : HSAILInstBasic_1Op; +} + +class HSAILInstBasic_2Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +multiclass InstBasic_2Op_FPTypes BrigVal> { + def _F32 : HSAILInstBasic_2Op; + def _F64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_SignedIntTypes BrigVal> { + def _S32 : HSAILInstBasic_2Op; + def _S64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstBasic_2Op; + def _U64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_BitTypes BrigVal> { + def _B1 : HSAILInstBasic_2Op; + def _B32 : HSAILInstBasic_2Op; + def _B64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_IntTypes BrigVal> { + def _S32 : HSAILInstBasic_2Op; + def _S64 : HSAILInstBasic_2Op; + def _U32 : HSAILInstBasic_2Op; + def _U64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_ShiftTypes BrigVal> { + def _S32 : HSAILInstBasic_2Op; + def _S64 : HSAILInstBasic_2Op; + def _U32 : HSAILInstBasic_2Op; + def _U64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_LdExp BrigVal> { + def _F32 : HSAILInstBasic_2Op; + def _F64 : HSAILInstBasic_2Op; +} + +multiclass InstBasic_2Op_BitRetType_U32_U32 BrigVal> { + def _B32 : HSAILInstBasic_2Op; + def _B64 : HSAILInstBasic_2Op; +} + +class HSAILInstBasic_3Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +class HSAILInstBasic_4Op BrigVal, HSAILProfile P> : + HSAILInstBasic; + +multiclass InstBasic_3Op_FPTypes BrigVal> { + def _F32 : HSAILInstBasic_3Op; + def _F64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_IntTypes BrigVal> { + def _S32 : HSAILInstBasic_3Op; + def _S64 : HSAILInstBasic_3Op; + def _U32 : HSAILInstBasic_3Op; + def _U64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_BitTypes BrigVal> { + def _B1 : HSAILInstBasic_3Op; + def _B32 : HSAILInstBasic_3Op; + def _B64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_CMov BrigVal> { + def _B1 : HSAILInstBasic_3Op; + def _B32 : HSAILInstBasic_3Op; + def _B64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_3Op_BitExtract BrigVal> { + def _S32 : HSAILInstBasic_3Op; + def _S64 : HSAILInstBasic_3Op; + + def _U32 : HSAILInstBasic_3Op; + def _U64 : HSAILInstBasic_3Op; +} + +multiclass InstBasic_4Op_BitInsert BrigVal> { + def _S32 : HSAILInstBasic_4Op; + def _S64 : HSAILInstBasic_4Op; + + def _U32 : HSAILInstBasic_4Op; + def _U64 : HSAILInstBasic_4Op; +} + +multiclass InstBasic_3Op_SUF BrigVal> { + def _S32 : HSAILInstBasic_3Op; + def _S64 : HSAILInstBasic_3Op; + + def _U32 : HSAILInstBasic_3Op; + def _U64 : HSAILInstBasic_3Op; + + def _F32 : HSAILInstBasic_3Op; + def _F64 : HSAILInstBasic_3Op; +} + + +// InstMod +class HSAILInstMod BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstMod = 1; +} + +// TODO: Needs pack operand? +class HSAILInstMod_1Op BrigVal, HSAILProfile P> : + HSAILInstMod; + +multiclass InstMod_1Op_FPTypes BrigVal> { + def _F32 : HSAILInstMod_1Op; + def _F64 : HSAILInstMod_1Op; +} + +multiclass InstMod_1Op_SignedIntTypes BrigVal> { + def _S32 : HSAILInstMod_1Op; + def _S64 : HSAILInstMod_1Op; +} + +multiclass InstMod_1Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstMod_1Op; + def _U64 : HSAILInstMod_1Op; +} + +multiclass InstMod_1Op_1BitTypes BrigVal> { + def _B32 : HSAILInstMod_1Op; + def _B64 : HSAILInstMod_1Op; +} + +multiclass InstMod_1Op_IntTypes BrigVal> { + def _S32 : HSAILInstMod_1Op; + def _S64 : HSAILInstMod_1Op; + def _U32 : HSAILInstMod_1Op; + def _U64 : HSAILInstMod_1Op; +} + +class HSAILInstMod_2Op BrigVal, HSAILProfile P> : + HSAILInstMod; + +multiclass InstMod_2Op_FPTypes BrigVal> { + def _F32 : HSAILInstMod_2Op; + def _F64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_SignedIntTypes BrigVal> { + def _S32 : HSAILInstMod_2Op; + def _S64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_UnsignedIntTypes BrigVal> { + def _U32 : HSAILInstMod_2Op; + def _U64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_BitTypes BrigVal> { + def _B32 : HSAILInstMod_2Op; + def _B64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_IntTypes BrigVal> { + def _S32 : HSAILInstMod_2Op; + def _S64 : HSAILInstMod_2Op; + def _U32 : HSAILInstMod_2Op; + def _U64 : HSAILInstMod_2Op; +} + +multiclass InstMod_2Op_SUF BrigVal> { + def _S32 : HSAILInstMod_2Op; + def _S64 : HSAILInstMod_2Op; + + def _U32 : HSAILInstMod_2Op; + def _U64 : HSAILInstMod_2Op; + + def _F32 : HSAILInstMod_2Op; + def _F64 : HSAILInstMod_2Op; +} + + +class HSAILInstMod_3Op BrigVal, HSAILProfile P> : + HSAILInstMod; + +multiclass InstMod_3Op_FPTypes BrigVal> { + def _F32 : HSAILInstMod_3Op; + def _F64 : HSAILInstMod_3Op; +} + + +// InstCvt +class InstCvt BrigVal, + RegisterClass DestRC, + ValueType SrcVT> : HSAILInst< + (outs DestRC:$dest), + (ins ftz:$ftz, + BrigRound:$round, + BrigType:$destTypedestLength, + BrigType:$sourceType, + getRegClassForVT.ret:$src), + opName#"$ftz$round$destTypedestLength$sourceType\t$dest, $src", + [], BrigVal +> { + let isConv = 1; + let InstCvt = 1; +} + +// This also generates the illegal type to same type cvts, which +// hopefully will never be used. +multiclass InstCvt_SrcTypes BrigVal, + RegisterClass DestRC> { + def _B1 : InstCvt; + + def _U32 : InstCvt; + def _U64 : InstCvt; + + def _S32 : InstCvt; + def _S64 : InstCvt; + + def _F16 : InstCvt; + def _F32 : InstCvt; + def _F64 : InstCvt; +} + +multiclass InstCvt_DestTypes BrigVal> { + defm _B1 : InstCvt_SrcTypes; + + defm _U32 : InstCvt_SrcTypes; + defm _U64 : InstCvt_SrcTypes; + + defm _S32 : InstCvt_SrcTypes; + defm _S64 : InstCvt_SrcTypes; + + defm _F16 : InstCvt_SrcTypes; + defm _F32 : InstCvt_SrcTypes; + defm _F64 : InstCvt_SrcTypes; +} + +// InstSourceType +class HSAILInstSourceType BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstSourceType = 1; +} + +class HSAILInstSourceType_1Op BrigVal, HSAILProfile P> : + HSAILInstSourceType; + +multiclass InstSourceType_1Op_U32_BitTypes BrigVal> { + def _U32_B32 : HSAILInstSourceType_1Op; + def _U32_B64 : HSAILInstSourceType_1Op; +} + +multiclass InstSourceType_1Op_U32_IntTypes BrigVal> { + def _U32_S32 : HSAILInstSourceType_1Op; + def _U32_S64 : HSAILInstSourceType_1Op; + + def _U32_U32 : HSAILInstSourceType_1Op; + def _U32_U64 : HSAILInstSourceType_1Op; +} + +class HSAILInstSourceType_2Op BrigVal, HSAILProfile P> : + HSAILInstSourceType; + +multiclass InstSourceType_2Op_Class_Types BrigVal> { + def _F32 : HSAILInstSourceType_2Op; + def _F64 : HSAILInstSourceType_2Op; +} + +class HSAILInstSourceType_3Op BrigVal, HSAILProfile P> : + HSAILInstSourceType; + +// Missing types that require q registers. +multiclass InstSourceType_3Op_Pack_Types BrigVal> { + def _U8X4_U32 : HSAILInstSourceType_3Op; + def _S8X4_S32 : HSAILInstSourceType_3Op; + + def _U8X8_U32 : HSAILInstSourceType_3Op; + def _S8X8_S32 : HSAILInstSourceType_3Op; + + def _U16X4_U32 : HSAILInstSourceType_3Op; + def _S16X4_S32 : HSAILInstSourceType_3Op; + + def _U32X2_U32 : HSAILInstSourceType_3Op; + def _S32X2_S32 : HSAILInstSourceType_3Op; + + def _F16X2_F16 : HSAILInstSourceType_3Op; + def _F16X4_F16 : HSAILInstSourceType_3Op; +} + +multiclass InstSourceType_3Op_Sad_Types BrigVal> { + def _U32_U32 : HSAILInstSourceType_3Op; + def _U32_U16X2 : HSAILInstSourceType_3Op; + def _U32_U8X4 : HSAILInstSourceType_3Op; +} + +class HSAILInstSourceType_4Op BrigVal, HSAILProfile P> : + HSAILInstSourceType; + + + +// InstLane +class HSAILInstLane BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 1; + let InstLane = 1; +} + +class HSAILInstLane_0Op BrigVal, HSAILProfile P> : + HSAILInstLane< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigWidth:$width, BrigType:$TypeLength, BrigType:$sourceType), + opName#"$width$TypeLength$sourceType\t$dest" +>; + +class HSAILInstLane_1Op BrigVal, HSAILProfile P> : + HSAILInstLane< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigWidth:$width, P.Src0RC:$src0, BrigType:$TypeLength, BrigType:$sourceType), + opName#"$width$TypeLength$sourceType\t$dest, $src0" +>; + +class HSAILInstLane_ActiveLanePermute BrigVal, HSAILProfile P> : + HSAILInstLane< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigWidth:$width, + P.Src0RC:$src0, P.Src1RC:$src1, P.Src2RC:$src2, P.Src3RC:$src3, + BrigType:$TypeLength, BrigType:$sourceType), + opName#"$width$TypeLength$sourceType\t$dest, $src0, $src1, $src2, $src3" +>; + +multiclass InstLane_ActiveLanePermute_Types BrigVal> { + def _B1 : HSAILInstLane_ActiveLanePermute; + def _B32 : HSAILInstLane_ActiveLanePermute; + def _B64 : HSAILInstLane_ActiveLanePermute; +} + +class HSAILInstLane_ActiveLaneMask BrigVal> : + HSAILInstLane< + BrigVal, + // FIXME: Using compound operands as dest seems to not be selectable + //(outs Vec4Op:$dest), + (outs GPR64:$dest0, GPR64:$dest1, GPR64:$dest2, GPR64:$dest3), + (ins BrigWidth:$width, + B1Op:$src0, BrigType:$TypeLength, BrigType:$sourceType), +// opName#"$width$TypeLength$sourceType\t$dest, $src0" + opName#"_v4$width$TypeLength$sourceType\t($dest0, $dest1, $dest2, $dest3), $src0" +>; + +// InstBr +class HSAILInstBr BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 1; + let InstBr = 1; +} + +class HSAILInstBr_0Op_NoRet BrigVal> : + HSAILInstBr; + +class HSAILInstBr_1Op_NoRet BrigVal> : + HSAILInstBr; + +class HSAILInstBr_2Op_NoRet BrigVal> : + HSAILInstBr; + +// InstSeg +class HSAILInstSeg BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstSeg = 1; +} + +class HSAILInstSeg_0Op BrigVal, HSAILProfile P> : + HSAILInstSeg; + +multiclass InstSeg_0Op_PtrTypes BrigVal> { + def _U32 : HSAILInstSeg_0Op; + def _U64 : HSAILInstSeg_0Op; +} + +// InstSegCvt +class HSAILInstSegCvt BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstSegCvt = 1; +} + +class HSAILInstSegCvt_1Op BrigVal, HSAILProfile P> : + HSAILInstSegCvt; + +multiclass InstSegCvt_1Op_PtrTypes BrigVal> { + def _U32_U32 : HSAILInstSegCvt_1Op; + def _U32_U64 : HSAILInstSegCvt_1Op; + def _U64_U32 : HSAILInstSegCvt_1Op; + def _U64_U64 : HSAILInstSegCvt_1Op; +} + +multiclass InstSegCvt_1Op_Segmentp_Types BrigVal> { + def _B1_U32 : HSAILInstSegCvt_1Op; + def _B1_U64 : HSAILInstSegCvt_1Op; +} + +// InstMemFence +class HSAILInstMemFence BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 1; + let mayLoad = 1; + let mayStore = 1; + let isBarrier = 1; + let isNotDuplicable = 1; // XXX - This seems unnecessary + let InstMemFence = 1; +} + +class InstMemFence BrigVal> : + HSAILInstMemFence< + BrigVal, + (outs), + (ins BrigMemoryOrder:$order, + BrigMemoryScope:$scope), + opName#"$order$scope" +>; + +// InstMem +class HSAILInstMem BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let mayLoad = 1; + let mayStore = 1; + let InstMem = 1; +} + +class LD BrigVal, dag outs> : + HSAILInstMem { + let mayStore = 0; + let hasSideEffects = 0; +} + +class StoreOperands { + field dag op_ins = (ins srcOp:$src, MEMOP:$address, BrigType:$TypeLength, + BrigSegment:$segment, BrigAlignment:$align); +} + +def store_u32 : StoreOperands; +def store_f32 : StoreOperands; +def store_u64 : StoreOperands; +def store_f64 : StoreOperands; + +def store_v2_u32 : StoreOperands; +def store_v2_f32 : StoreOperands; + +def store_v2_u64 : StoreOperands; +def store_v2_f64 : StoreOperands; + +def store_v3_u32 : StoreOperands; +def store_v3_f32 : StoreOperands; + +def store_v3_u64 : StoreOperands; +def store_v3_f64 : StoreOperands; + +def store_v4_u32 : StoreOperands; +def store_v4_f32 : StoreOperands; +def store_v4_u64 : StoreOperands; +def store_v4_f64 : StoreOperands; + + +multiclass LD_Types BrigVal> { + def _S8 : LD, + LdStVectorMap; + def _U8 : LD, + LdStVectorMap; + + def _S16 : LD, + LdStVectorMap; + def _U16 : LD, + LdStVectorMap; + + def _S32 : LD, + LdStVectorMap; + def _U32 : LD, + LdStVectorMap; + def _F32 : LD, + LdStVectorMap; + + def _SAMP : LD, + LdStVectorMap; + + def _S64 : LD, + LdStVectorMap; + def _U64 : LD, + LdStVectorMap; + def _F64 : LD, + LdStVectorMap; + + + def _V2_S8 : LD, + LdStVectorMap; + def _V2_U8 : LD, + LdStVectorMap; + + def _V2_S16 : LD, + LdStVectorMap; + def _V2_U16 : LD, + LdStVectorMap; + + def _V2_S32 : LD, + LdStVectorMap; + def _V2_U32 : LD, + LdStVectorMap; + def _V2_F32 : LD, + LdStVectorMap; + + def _V2_S64 : LD, + LdStVectorMap; + def _V2_U64 : LD, + LdStVectorMap; + def _V2_F64 : LD, + LdStVectorMap; + + + def _V3_S8 : LD, + LdStVectorMap; + def _V3_U8 : LD, + LdStVectorMap; + + def _V3_S16 : LD, + LdStVectorMap; + def _V3_U16 : LD, + LdStVectorMap; + + def _V3_S32 : LD, + LdStVectorMap; + def _V3_U32 : LD, + LdStVectorMap; + def _V3_F32 : LD, + LdStVectorMap; + + def _V3_S64 : LD, + LdStVectorMap; + def _V3_U64 : LD, + LdStVectorMap; + def _V3_F64 : LD, + LdStVectorMap; + + + def _V4_S8 : LD, + LdStVectorMap; + def _V4_U8 : LD, + LdStVectorMap; + + def _V4_S16 : LD, + LdStVectorMap; + def _V4_U16 : LD, + LdStVectorMap; + + def _V4_S32 : LD, + LdStVectorMap; + def _V4_U32 : LD, + LdStVectorMap; + def _V4_F32 : LD, + LdStVectorMap; + + def _V4_S64 : LD, + LdStVectorMap; + def _V4_U64 : LD, + LdStVectorMap; + def _V4_F64 : LD, + LdStVectorMap; +} + +class ST BrigVal, dag ins> : + HSAILInstMem { + let mayLoad = 0; + let hasSideEffects = 0; +} + +multiclass ST_Types BrigVal> { + def _U8 : ST, + LdStVectorMap; + + def _U16 : ST, + LdStVectorMap; + + def _U32 : ST, + LdStVectorMap; + def _F32 : ST, + LdStVectorMap; + + def _U64 : ST, + LdStVectorMap; + def _F64 : ST, + LdStVectorMap; + + + def _V2_U8 : ST, + LdStVectorMap; + def _V2_U16 : ST, + LdStVectorMap; + + def _V2_U32 : ST, + LdStVectorMap; + def _V2_F32 : ST, + LdStVectorMap; + + def _V2_U64 : ST, + LdStVectorMap; + def _V2_F64 : ST, + LdStVectorMap; + + + def _V3_U8 : ST, + LdStVectorMap; + def _V3_U16 : ST, + LdStVectorMap; + + def _V3_U32 : ST, + LdStVectorMap; + def _V3_F32 : ST, + LdStVectorMap; + + def _V3_U64 : ST, + LdStVectorMap; + def _V3_F64 : ST, + LdStVectorMap; + + + def _V4_U8 : ST, + LdStVectorMap; + + def _V4_U16 : ST, + LdStVectorMap; + + def _V4_U32 : ST, + LdStVectorMap; + def _V4_F32 : ST, + LdStVectorMap; + + def _V4_U64 : ST, + LdStVectorMap; + def _V4_F64 : ST, + LdStVectorMap; +} + +// InstAtomic +class HSAILInstAtomic BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 1; + let mayLoad = 1; + let mayStore = 1; + let InstAtomic = 1; + + int atomicOperation = ?; +} + +class InstAtomic_0Op BrigVal, + int opVal, HSAILProfile P, string noRetOp = ""> : + HSAILInstAtomic< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$dest, $address" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +class InstAtomic_1Op BrigVal, + int opVal, HSAILProfile P, string noRetOp = ""> : + HSAILInstAtomic< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + P.Src0RC:$src0, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$dest, $address, $src0" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +class InstAtomic_2Op BrigVal, + int opVal, HSAILProfile P, string noRetOp = ""> : + HSAILInstAtomic< + BrigVal, + (outs P.DestRC:$dest), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + P.Src0RC:$src0, + P.Src1RC:$src1, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$dest, $address, $src0, $src1" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +multiclass InstAtomic_0Op_BitTypes BrigVal, + int opVal, string noRetOp = ""> { + def _B32 : InstAtomic_0Op; + def _B64 : InstAtomic_0Op; +} + +multiclass InstAtomic_1Op_BitTypes BrigVal, + int opVal, string noRetOp = ""> { + def _B32 : InstAtomic_1Op; + def _B64 : InstAtomic_1Op; +} + +multiclass InstAtomic_2Op_BitTypes BrigVal, int opVal, string noRetOp = ""> { + def _B32 : InstAtomic_2Op; + def _B64 : InstAtomic_2Op; +} + +multiclass InstAtomic_1Op_IntTypes BrigVal, int opVal, string noRetOp = ""> { + def _S32 : InstAtomic_1Op; + def _S64 : InstAtomic_1Op; + + def _U32 : InstAtomic_1Op; + def _U64 : InstAtomic_1Op; +} + +class InstAtomic_0Op_NoRet BrigVal, int opVal, + HSAILProfile P, string typeSuffix> : + HSAILInstAtomic< + BrigVal, + (outs), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$address" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +class InstAtomic_1Op_NoRet BrigVal, + int opVal, HSAILProfile P, + string typeSuffix> : + HSAILInstAtomic< + BrigVal, + (outs), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + P.Src0RC:$src0, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$address, $src0" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +class InstAtomic_2Op_NoRet BrigVal, int opVal, + HSAILProfile P, string typeSuffix> : + HSAILInstAtomic< + BrigVal, + (outs), + (ins BrigAtomicOperation:$op, + AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address, + P.Src0RC:$src0, + P.Src1RC:$src1, + BrigType:$TypeLength), + opName#"$segment$order$scope$equiv$TypeLength\t$address, $src0, $src1" +>, AtomicNoRet { + let atomicOperation = opVal; +} + +multiclass InstAtomic_1Op_NoRet_BitTypes BrigVal, int opVal> { + def _B32 : InstAtomic_1Op_NoRet; + def _B64 : InstAtomic_1Op_NoRet; +} + +multiclass InstAtomic_1Op_NoRet_IntTypes BrigVal, int opVal> { + def _S32 : InstAtomic_1Op_NoRet; + def _S64 : InstAtomic_1Op_NoRet; + + def _U32 : InstAtomic_1Op_NoRet; + def _U64 : InstAtomic_1Op_NoRet; +} + +// InstCmp +class HSAILInstCmp BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstCmp = 1; + let isCompare = 1; +} + +class InstCmp BrigVal, HSAILProfile P> : + HSAILInstCmp; + +multiclass InstCmp_CmpTypes BrigVal, BRIGType destTy> { + def _B1 : InstCmp("Inst"#destTy.InstName#"_B1_B1")>; + def _S32 : InstCmp("Inst"#destTy.InstName#"_S32_S32")>; + def _S64 : InstCmp("Inst"#destTy.InstName#"_S64_S64")>; + def _U32 : InstCmp("Inst"#destTy.InstName#"_U32_U32")>; + def _U64 : InstCmp("Inst"#destTy.InstName#"_U64_U64")>; + def _F32 : InstCmp("Inst"#destTy.InstName#"_F32_F32")>; + def _F64 : InstCmp("Inst"#destTy.InstName#"_F64_F64")>; +} + +// TODO: Support for other return types. +multiclass InstCmp_RetTypes BrigVal> { + defm _B1 : InstCmp_CmpTypes; + // defm _S32 : InstCmp_CmpTypes; + // defm _U32 : InstCmp_CmpTypes; + // defm _S64 : InstCmp_CmpTypes; + // defm _U64 : InstCmp_CmpTypes; + // defm _F32 : InstCmp_CmpTypes; + // defm _F64 : InstCmp_CmpTypes; +} + + +// InstAddr +class HSAILInstAddr BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let hasSideEffects = 0; + let InstAddr = 1; +} + +class HSAILInstAddr_1Op BrigVal, HSAILProfile P> : + HSAILInstAddr; + +multiclass InstAddr_1Op_PtrTypes BrigVal> { + def _U32 : HSAILInstAddr_1Op; + def _U64 : HSAILInstAddr_1Op; +} + +// InstImage +class HSAILInstImage BrigVal, + dag outs, dag ins, string asmstr> : + HSAILInst { + let mayLoad = 1; + let mayStore = 1; + let InstImage = 1; +} + +class HSAILInstImage_Ld_1D BrigVal> : + HSAILInstImage { + let mayStore = 0; +} + +//////////////////////////////////////////////////////////////////////////////// +// Basic instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstBasic_0Op_Pat : Pat < + (vt (node)), + (inst bt) +>; + +class InstBasic_0Op_NoRet_Pat : Pat < + (node), + (inst BrigType.NONE) +>; + +class InstBasic_1Op_Pat : Pat < + (vt (node (vt (GPROrImm vt:$src0)))), + (inst $src0, bt) +>; + +// 1 operand is always i32 immediate +class GridSize_Pat : Pat < + (vt (node i32:$src0)), + (inst imm:$src0, bt) +>; + +multiclass InstBasic_1Op_IntTypes_Pat { + def : InstBasic_1Op_Pat(inst#!if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_1Op_Pat(inst#!if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +multiclass InstBasic_1Op_BitTypes_Pat { + def : InstBasic_1Op_Pat(inst#"_B32"), node, i32, + BrigType.B32 + >; + + def : InstBasic_1Op_Pat(inst#"_B64"), node, i64, + BrigType.B64 + >; + + def : InstBasic_1Op_Pat(inst#"_B1"), node, i1, + BrigType.B1 + >; +} + +multiclass InstBasic_1Op_FPTypes_Pat { + def : InstBasic_1Op_Pat(inst#"_F32"), node, f32, BrigType.F32>; + def : InstBasic_1Op_Pat(inst#"_F64"), node, f64, BrigType.F64>; +} + +class InstBasic_2Op_Pat : Pat < + (vt0 (node (GPROrImm vt0:$src0), (vt1 (GPROrImm vt1:$src1)))), + (inst $src0, $src1, bt) +>; + +class InstBasic_2Op_RetType_Pat : Pat < + (destvt (node (GPROrImm vt0:$src0), (vt1 (GPROrImm vt1:$src1)))), + (inst $src0, $src1, bt) +>; + +multiclass InstBasic_2Op_BitRetType_U32_U32_Pat { + def : InstBasic_2Op_RetType_Pat(inst#"_B32"), + node, i32, i32, i32, BrigType.B32 + >; + + def : InstBasic_2Op_RetType_Pat(inst#"_B64"), + node, i64, i32, i32, BrigType.B64 + >; +} + +multiclass InstBasic_2Op_IntTypes_Pat { + def : InstBasic_2Op_Pat(inst#!if(signed, "_S32", "_U32")), node, i32, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_2Op_Pat(inst#!if(signed, "_S64", "_U64")), node, i64, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +multiclass InstBasic_2Op_BitIntTypes_Pat { + def : InstBasic_2Op_Pat(inst#"_B32"), node, i32, i32, + BrigType.B32 + >; + + def : InstBasic_2Op_Pat(inst#"_B64"), node, i64, i64, + BrigType.B64 + >; + + def : InstBasic_2Op_Pat(inst#"_B1"), node, i1, i1, + BrigType.B1 + >; +} + +multiclass InstBasic_2Op_FPTypes_Pat { + def : InstBasic_2Op_Pat; + def : InstBasic_2Op_Pat; +} + +class InstBasic_3Op_Pat : Pat < + (node (vt (GPROrImm vt:$src0)), (vt (GPROrImm vt:$src1)), (vt (GPROrImm vt:$src2))), + (inst $src0, $src1, $src2, bt) +>; +multiclass InstBasic_3Op_IntTypes_Pat { + def : InstBasic_3Op_Pat(inst# !if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_3Op_Pat(inst# !if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +// XXX - b1 missing intentionally with current set of instructions. +multiclass InstBasic_3Op_BitTypes_Pat { + def : InstBasic_3Op_Pat(inst#"_B32"), node, i32, BrigType.B32>; + def : InstBasic_3Op_Pat(inst#"_B64"), node, i64, BrigType.B64>; +} + +multiclass InstBasic_3Op_FPTypes_Pat { + def : InstBasic_3Op_Pat(inst#"_F32"), node, f32, BrigType.F32>; + def : InstBasic_3Op_Pat(inst#"_F64"), node, f64, BrigType.F64>; +} + +class InstBasic_CMov_Pat : Pat < + (select i1:$src0, (GPROrImm vt:$src1), (GPROrImm vt:$src2)), + (vt (inst $src0, $src1, $src2, bt)) +>; + +class InstBasic_3Op_BitExtract_Pat : Pat < + (node (vt (GPROrImm vt:$src0)), (i32 (GPROrImm i32:$src1)), (i32 (GPROrImm i32:$src2))), + (inst $src0, $src1, $src2, bt) +>; + +multiclass InstBasic_3Op_BitExtract_IntTypes_Pat { + def : InstBasic_3Op_BitExtract_Pat(inst# !if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_3Op_BitExtract_Pat(inst# !if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +class InstBasic_4Op_BitInsert_Pat : Pat < + (node (vt (GPROrImm vt:$src0)), + (vt (GPROrImm vt:$src1)), + (i32 (GPROrImm i32:$src2)), + (i32 (GPROrImm i32:$src3))), + (inst $src0, $src1, $src2, $src3, bt) +>; + +multiclass InstBasic_4Op_BitInsert_IntTypes_Pat { + def : InstBasic_4Op_BitInsert_Pat(inst# !if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstBasic_4Op_BitInsert_Pat(inst# !if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// Mod instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstMod_1Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0))), + (inst ftz, round, $src0, bt) +>; + +class InstMod_1Op_Intrin_Pat : Pat < + (vt (node i1:$ftz, i32:$round, (vt (GPROrImm vt:$src0)))), + (inst imm:$ftz, imm:$round, $src0, bt) +>; + +class InstMod_2Op_Pat : Pat < + (vt (node (vt (GPROrImm vt:$src0)), (vt (GPROrImm vt:$src1)))), + (inst ftz, round, $src0, $src1, bt) +>; + +class InstMod_2Op_Intrin_Pat : Pat < + (vt (node i1:$ftz, + i32:$round, + (vt (GPROrImm vt:$src0)), + (vt (GPROrImm vt:$src1)))), + (inst imm:$ftz, imm:$round, $src0, $src1, bt) +>; + +class InstMod_2Op_NoRound_Intrin_Pat : Pat < + (vt (node i1:$ftz, + (vt (GPROrImm vt:$src0)), + (vt (GPROrImm vt:$src1)))), + (inst imm:$ftz, BrigRound.NONE, $src0, $src1, bt) +>; + +class InstMod_3Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0), (GPROrImm vt:$src1), (GPROrImm vt:$src2))), + (inst ftz, round, $src0, $src1, $src2, bt) +>; + +multiclass InstMod_1Op_FPTypes_Pat { + def : InstMod_1Op_Pat(inst#"_F32"), node, f32, BrigType.F32, round, 1>; + def : InstMod_1Op_Pat(inst#"_F64"), node, f64, BrigType.F64, round, 0>; +} + +multiclass InstMod_1Op_FPTypes_Intrin_Pat { + def : InstMod_1Op_Intrin_Pat(inst#"_F32"), node, f32, BrigType.F32>; + def : InstMod_1Op_Intrin_Pat(inst#"_F64"), node, f64, BrigType.F64>; +} + +multiclass InstMod_2Op_IntTypes_Pat { + def : InstMod_2Op_Pat(inst# !if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : InstMod_2Op_Pat(inst# !if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +multiclass InstMod_2Op_FPTypes_Pat { + def : InstMod_2Op_Pat(inst#"_F32"), node, f32, BrigType.F32, round, 1>; + def : InstMod_2Op_Pat(inst#"_F64"), node, f64, BrigType.F64, round, 0>; +} + +multiclass InstMod_2Op_Intrin_FPTypes_Pat { + def : InstMod_2Op_Intrin_Pat(inst#"_F32"), node, f32, BrigType.F32>; + def : InstMod_2Op_Intrin_Pat(inst#"_F64"), node, f64, BrigType.F64>; +} + +multiclass InstMod_3Op_FPTypes_Pat { + def : InstMod_3Op_Pat(inst#"_F32"), node, f32, BrigType.F32, BrigRound.FLOAT_DEFAULT, 1>; + def : InstMod_3Op_Pat(inst#"_F64"), node, f64, BrigType.F64, BrigRound.FLOAT_DEFAULT, 0>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SourceType instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstSourceType_1Op_Pat : Pat < + (destvt (node (srcvt (GPROrImm srcvt:$src0)))), + (inst $src0, destbt, srcbt) +>; + +class InstSourceType_2Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0), (GPROrImm vt:$src1))), + (inst $src0, $src1, destbt, srcbt) +>; + +class InstSourceType_Class_Pat : Pat < + (i1 (node (vt (GPROrImm vt:$src0)), (i32 (GPROrImm i32:$src1)))), + (inst $src0, $src1, BrigType.B1, srcbt) +>; + +class InstSourceType_3Op_Pat : Pat < + (vt (node (GPROrImm vt:$src0), (GPROrImm vt:$src1), (GPROrImm vt:$src2))), + (inst $src0, $src1, $src2, destbt, srcbt) +>; + +class InstSourceType_4Op_Pat : Pat < + (node (GPROrImm vt:$src0), + (GPROrImm vt:$src1), + (GPROrImm vt:$src2), + (GPROrImm vt:$src3)), + (inst $src0, $src1, $src2, $src3, destbt, srcbt) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Lane instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class ActiveLanePermutePat : Pat< + (vt (node (i32 timm:$width), + (vt (GPROrImm vt:$src0)), + (i32 (GPROrImm i32:$src1)), + (vt (GPROrImm vt:$src2)), + (i1 (GPROrImm i1:$src3)))), + (inst (i32 $width), $src0, $src1, $src2, $src3, bt, BrigType.NONE) +>; + +class ActiveLaneIdPat : Pat< + (vt (node (i32 timm:$width))), + (inst $width, bt, BrigType.NONE) +>; + +class ActiveLaneCountPat : Pat< + (vt (node (i32 timm:$width), (i1 (GPROrImm i1:$src0)))), + (inst $width, $src0, bt, BrigType.B1) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Br instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstBr_0Op_NoRet_Pat : Pat < + (node), + (inst width, BrigType.NONE) +>; + +class InstBr_0Op_NoRet_Intrin_Pat : Pat < + (node i32:$width), + (inst imm:$width, BrigType.NONE) +>; + +class InstBr_0Op_Pat : Pat < + (vt (node)), + (inst width, bt) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Seg instruction patterns +//////////////////////////////////////////////////////////////////////////////// + + +class InstSeg_0Op_Pat : Pat < + (vt (node)), + (inst segment, bt) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Addr instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class InstAddr_1Op_Pat : Pat < + (node ADDR:$address), + (inst segment, MEMOP:$address, bt) +>; + +//////////////////////////////////////////////////////////////////////////////// +// Atomic instruction patterns +//////////////////////////////////////////////////////////////////////////////// + +class AtomicPat_0Op_Pat : Pat< + (node (AtomicAddr AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address)), + (vt (inst inst.atomicOperation, + $segment, + $order, + $scope, + $equiv, + MEMOP:$address, + bt)) +>; + +class AtomicPat_1Op_Pat : Pat< + (node (AtomicAddr AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address), + (GPROrImm vt:$src0)), + (vt (inst inst.atomicOperation, + $segment, + $order, + $scope, + $equiv, + MEMOP:$address, + $src0, + bt)) +>; + +class AtomicPat_2Op_Pat : Pat< + (node (AtomicAddr AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address), + (GPROrImm vt:$src0), + (GPROrImm vt:$src1)), + (vt (inst inst.atomicOperation, + $segment, + $order, + $scope, + $equiv, + MEMOP:$address, + vt:$src0, + vt:$src1, + bt)) +>; + +class AtomicPat_1Op_NoRet_Pat : Pat< + (node (AtomicAddr AddressSpace:$segment, + BrigMemoryOrder:$order, + BrigMemoryScope:$scope, + equiv:$equiv, + MEMOP:$address), + (vt (GPROrImm vt:$src0))), + (inst inst.atomicOperation, + $segment, + $order, + $scope, + $equiv, + MEMOP:$address, + vt:$src0, + bt) +>; + +multiclass AtomicPat_0Op_BitTypes { + def : AtomicPat_0Op_Pat< + !cast(inst#"_B32"), node, i32, BrigType.B32 + >; + + def : AtomicPat_0Op_Pat< + !cast(inst#"_B64"), node, i64, BrigType.B64 + >; +} + +multiclass AtomicPat_1Op_IntTypes { + def : AtomicPat_1Op_Pat< + !cast(inst#!if(signed, "_S32", "_U32")), node, i32, + !if(signed, BrigType.S32, BrigType.U32) + >; + + def : AtomicPat_1Op_Pat< + !cast(inst#!if(signed, "_S64", "_U64")), node, i64, + !if(signed, BrigType.S64, BrigType.U64) + >; +} + +multiclass AtomicPat_1Op_BitTypes { + def : AtomicPat_1Op_Pat< + !cast(inst#"_B32"), node, i32, BrigType.B32 + >; + + def : AtomicPat_1Op_Pat< + !cast(inst#"_B64"), node, i64, BrigType.B64 + >; +} + +multiclass AtomicPat_1Op_NoRet_BitTypes { + def : AtomicPat_1Op_NoRet_Pat< + !cast(inst#"_B32"), node, i32, BrigType.B32 + >; + + def : AtomicPat_1Op_NoRet_Pat< + !cast(inst#"_B64"), node, i64, BrigType.B64 + >; +} + +multiclass AtomicPat_2Op_BitTypes { + def : AtomicPat_2Op_Pat< + !cast(inst#"_B32"), node, i32, BrigType.B32 + >; + + def : AtomicPat_2Op_Pat< + !cast(inst#"_B64"), node, i64, BrigType.B64 + >; +} + +def target : Operand {} Index: lib/Target/HSAIL/HSAILInstrInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstrInfo.h @@ -0,0 +1,200 @@ +//===- HSAILInstrInfo.h - HSAIL Instruction Information --------*- C++ -*- ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the HSAIL implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILINSTRINFO_H +#define LLVM_LIB_TARGET_HSAIL_HSAILINSTRINFO_H + +#include "HSAIL.h" +#include "HSAILRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "HSAILGenInstrInfo.inc" + +namespace llvm { +class HSAILSubtarget; +class RegScavenger; + +class HSAILInstrInfo : public HSAILGenInstrInfo { + const HSAILRegisterInfo RI; + +public: + explicit HSAILInstrInfo(HSAILSubtarget &st); + + ~HSAILInstrInfo(); + + const HSAILRegisterInfo &getRegisterInfo() const { return RI; } + + bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, + unsigned &DstReg, unsigned &SubIdx) const override; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const override; + + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const override; + + unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const override; + + bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const override; + + unsigned RemoveBranch(MachineBasicBlock &MBB) const override; + + unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + ArrayRef Cond, + DebugLoc DL) const override; + + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL, unsigned DestReg, unsigned SrcReg, + bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, unsigned SrcReg, + bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, unsigned DestReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + +public: + bool areLoadsFromSameBasePtr(SDNode *Node1, SDNode *Node2, int64_t &Offset1, + int64_t &Offset2) const override; + + bool shouldScheduleLoadsNear(SDNode *Node1, SDNode *Node2, int64_t Offset1, + int64_t Offset2, + unsigned NumLoads) const override; + + bool + ReverseBranchCondition(SmallVectorImpl &Cond) const override; + + bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override; + + RegScavenger *getRS() const { return RS; } + + bool expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const override; + + const TargetRegisterClass *getOpRegClass(const MachineRegisterInfo &MRI, + const MachineInstr &MI, + unsigned OpNo) const; + + bool verifyInstruction(const MachineInstr *MI, + StringRef &ErrInfo) const override; + + /// \brief Returns the operand named \p Op. If \p MI does not have an + /// operand named \c Op, this function returns nullptr. + MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; + + const MachineOperand *getNamedOperand(const MachineInstr &MI, + unsigned OpName) const { + return getNamedOperand(const_cast(MI), OpName); + } + + int64_t getNamedModifierOperand(const MachineInstr &MI, + unsigned OpName) const { + return getNamedOperand(MI, OpName)->getImm(); + } + + int64_t getNamedModifierOperand(MachineInstr &MI, unsigned OpName) const { + return getNamedOperand(MI, OpName)->getImm(); + } + + bool isInstBasic(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstBasic; + } + + bool isInstMod(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstMod; + } + + bool isInstSourceType(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstSourceType; + } + + bool isInstLane(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstLane; + } + + bool isInstBr(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstBr; + } + + bool isInstSeg(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstSeg; + } + + bool isInstSegCvt(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstSegCvt; + } + + bool isInstMemFence(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstMemFence; + } + + bool isInstCmp(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstCmp; + } + + bool isInstMem(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstMem; + } + + bool isInstAtomic(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstAtomic; + } + + bool isInstImage(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstImage; + } + + bool isInstCvt(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstCvt; + } + + bool isInstAddr(uint16_t Opcode) const { + return get(Opcode).TSFlags & HSAILInstrFlags::InstAddr; + } + + uint16_t getBrigOpcode(uint16_t Opcode) const { + return (get(Opcode).TSFlags & HSAILInstrFlags::InstBrigOpcode) >> + Log2_64(HSAILInstrFlags::InstBrigOpcodeLo); + } + +private: + RegScavenger *RS; +}; + +namespace HSAIL { +int getAtomicRetOp(uint16_t Opcode); +int getAtomicNoRetOp(uint16_t Opcode); + +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); + +// FIXME: This is a thin wrapper around the similarly named and generated +// getLdStVectorOpcode, which we should use directly. +int getVectorLdStOpcode(uint16_t Opcode, unsigned vsize); +} + +} // End llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILInstrInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstrInfo.cpp @@ -0,0 +1,942 @@ +//===-- HSAILInstrInfo.cpp - HSAIL Instruction Information ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILInstrInfo.h" +#include "HSAILBrigDefs.h" +#include "HSAILTargetMachine.h" +#include "HSAILUtilityFunctions.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Support/CommandLine.h" + +#include + +#define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRMAP_INFO +#include "HSAILGenInstrInfo.inc" +using namespace llvm; +namespace llvm { + +static cl::opt DisableBranchAnalysis("disable-branch-analysis", + cl::Hidden, + cl::desc("Disable branch analysis")); +static cl::opt + DisableCondReversion("disable-branch-cond-reversion", cl::Hidden, + cl::desc("Disable branch condition reversion")); + +// Reverse conditions in branch analysis +// It marks whether or not we need to reverse condition +// when we insert new branch +enum CondReverseFlag { + COND_IRREVERSIBLE, // For branches that can not be reversed + COND_REVERSE_POSITIVE, // Don't need invertion + COND_REVERSE_NEGATIVE, // Need invertion + COND_REVERSE_DEPENDANT // Indicates that this condition has exactly + // one depency which should be reverted with it +}; + +HSAILInstrInfo::HSAILInstrInfo(HSAILSubtarget &st) + : HSAILGenInstrInfo(), + // : TargetInstrInfoImpl(HSAILInsts, array_lengthof(HSAILInsts)), + RI(st) { + RS = new RegScavenger(); +} + +HSAILInstrInfo::~HSAILInstrInfo() { delete RS; } + +bool HSAILInstrInfo::isCoalescableExtInstr(const MachineInstr &MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SubIdx) const { + // HSAIL does not have any registers that overlap and cause + // an extension. + return false; +} + +unsigned HSAILInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + const MCInstrDesc &MCID = get(MI->getOpcode()); + if (!MCID.mayLoad() || !MI->hasOneMemOperand()) + return HSAIL::NoRegister; + + const MachineOperand *Segment = getNamedOperand(*MI, HSAIL::OpName::segment); + if (!Segment || Segment->getImm() != HSAILAS::SPILL_ADDRESS) + return HSAIL::NoRegister; + + int AddressIdx = + HSAIL::getNamedOperandIdx(MI->getOpcode(), HSAIL::OpName::address); + const MachineOperand &Base = MI->getOperand(AddressIdx + HSAILADDRESS::BASE); + + if (Base.isFI()) { + FrameIndex = Base.getIndex(); + return MI->getOperand(0).getReg(); + } + + return HSAIL::NoRegister; +} + +unsigned HSAILInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + return isLoadFromStackSlot(MI, FrameIndex); +} + +unsigned HSAILInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + const MCInstrDesc &MCID = get(MI->getOpcode()); + if (!MCID.mayStore() || !MI->hasOneMemOperand()) + return 0; + + const MachineOperand *Segment = getNamedOperand(*MI, HSAIL::OpName::segment); + if (!Segment || Segment->getImm() != HSAILAS::SPILL_ADDRESS) + return HSAIL::NoRegister; + + int AddressIdx = + HSAIL::getNamedOperandIdx(MI->getOpcode(), HSAIL::OpName::address); + const MachineOperand &Base = MI->getOperand(AddressIdx + HSAILADDRESS::BASE); + if (Base.isFI()) { + FrameIndex = Base.getIndex(); + return MI->getOperand(0).getReg(); + } + + return HSAIL::NoRegister; +} + +unsigned HSAILInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + return isStoreToStackSlot(MI, FrameIndex); +} + +static bool IsDefBeforeUse(MachineBasicBlock &MBB, unsigned Reg, + const MachineRegisterInfo &MRI, bool &CanReverse) { + // TODO_HSA: With LiveVariable analysis we can make it + // a lot more effectively. + // But currently we can not rely on any of the analysis results + // In latest llvm exist MRI::tracksLiveness flag + // if it is true we don't need this costly bfs search + + CanReverse = true; + + if (MRI.hasOneUse(Reg)) + return true; + + std::queue Q; + SmallPtrSet Visited; + + Q.push(&MBB); + + while (!Q.empty()) { + MachineBasicBlock *cur_mbb = Q.front(); + Q.pop(); + + for (MachineBasicBlock::succ_iterator succ = cur_mbb->succ_begin(), + succ_end = cur_mbb->succ_end(); + succ != succ_end; ++succ) + if (!Visited.count(*succ)) { + Visited.insert(*succ); + + bool need_process_futher = true; + + // Process basic block + for (MachineBasicBlock::iterator instr = (*succ)->begin(), + instr_end = (*succ)->end(); + instr != instr_end; ++instr) { + if (instr->readsRegister(Reg)) { + // Always abort on circular dependencies + // Which will require to insert or remove not + if (instr->getParent() == &MBB && + (instr->isBranch() || (instr->getOpcode() == HSAIL::NOT_B1))) { + CanReverse = false; + } + + return false; + } + if (instr->definesRegister(Reg)) { + need_process_futher = false; + break; + } + } + + // Schedule basic block + if (need_process_futher) + Q.push(*succ); + } + } + + return true; +} + +static bool CheckSpillAfterDef(MachineInstr *start, unsigned reg, + bool &canBeSpilled) { + MachineBasicBlock *MBB = start->getParent(); + MachineBasicBlock::reverse_iterator B(start); + MachineBasicBlock::reverse_iterator E = MBB->rend(); + if (E == B) + return false; // empty block check + ++B; // skip branch instr itself + for (MachineBasicBlock::reverse_iterator I = B; I != E; ++I) { + if (I->definesRegister(reg)) { + return true; + } + if (I->readsRegister(reg) && (HSAIL::isConv(&*I) || I->mayStore())) { + canBeSpilled = true; + return true; + } + } + return false; +} + +static bool IsSpilledAfterDef(MachineInstr *start, unsigned reg) { + bool canBeSpilled = false; + if (!CheckSpillAfterDef(start, reg, canBeSpilled)) { + std::queue Q; + SmallPtrSet Visited; + MachineBasicBlock *MBB = start->getParent(); + Q.push(MBB); + while (!Q.empty() && !canBeSpilled) { + MachineBasicBlock *cur_mbb = Q.front(); + Q.pop(); + for (MachineBasicBlock::pred_iterator pred = cur_mbb->pred_begin(), + pred_end = cur_mbb->pred_end(); + pred != pred_end; ++pred) { + if (!Visited.count(*pred) && !(*pred)->empty()) { + Visited.insert(*pred); + MachineInstr *instr; + MachineBasicBlock::instr_iterator termIt = + (*pred)->getFirstInstrTerminator(); + if (termIt == (*pred)->instr_end()) { + instr = &*(*pred)->rbegin(); + } else { + instr = termIt; + } + if (!CheckSpillAfterDef(instr, reg, canBeSpilled)) { + Q.push(*pred); + } + } + } + } + } + return canBeSpilled; +} + +bool HSAILInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + if (DisableBranchAnalysis) + return true; + + // Start from the bottom of the block and work up, examining the + // terminator instructions. + MachineBasicBlock::iterator I = MBB.end(); + MachineBasicBlock::iterator UnCondBrIter = MBB.end(); + + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + + // Working from the bottom, when we see a non-terminator instruction, we're + // done. + if (!isUnpredicatedTerminator(I)) + break; + + // A terminator that isn't a branch can't easily be handled by this + // analysis. + if (!I->getDesc().isBranch()) + return true; + + // Handle unconditional branches. + if (I->getOpcode() == HSAIL::BR) { + int Src0Idx = HSAIL::getNamedOperandIdx(HSAIL::BR, HSAIL::OpName::src0); + UnCondBrIter = I; + + Cond.clear(); + FBB = 0; + + if (!AllowModify) { + TBB = I->getOperand(Src0Idx).getMBB(); + continue; + } + + // If the block has any instructions after a JMP, delete them. + while (std::next(I) != MBB.end()) + std::next(I)->eraseFromParent(); + + // Delete the JMP if it's equivalent to a fall-through. + if (MBB.isLayoutSuccessor(I->getOperand(Src0Idx).getMBB())) { + TBB = 0; + I->eraseFromParent(); + I = MBB.end(); + UnCondBrIter = MBB.end(); + continue; + } + + // TBB is used to indicate the unconditional destination. + TBB = I->getOperand(Src0Idx).getMBB(); + continue; + } + + // Handle conditional branches. + + // First conditional branch + if (Cond.empty()) { + int Src0Idx = HSAIL::getNamedOperandIdx(HSAIL::CBR, HSAIL::OpName::src0); + int Src1Idx = HSAIL::getNamedOperandIdx(HSAIL::CBR, HSAIL::OpName::src1); + + FBB = TBB; + TBB = I->getOperand(Src1Idx).getMBB(); + + // Insert condition as pair - (register, reverse flag) + // Or in case if there is dependencies + // (register, COND_REVERSE_DEPENDANT, free reg num, reverse flag) + Cond.push_back(I->getOperand(Src0Idx)); + + if (DisableCondReversion) { + Cond.push_back(MachineOperand::CreateImm(COND_IRREVERSIBLE)); + continue; + } + + // Determine condition dependencies + unsigned reg = I->getOperand(Src0Idx).getReg(); + bool can_reverse = false; + bool is_def_before_use = IsDefBeforeUse(MBB, reg, MRI, can_reverse); + if (can_reverse) { + /* Here we're taking care of the possible control register spilling + that occur between it's definition and branch. If it does, we're not + allowed to inverse branch because some other place rely on the + unspilled value. + */ + can_reverse = !IsSpilledAfterDef(I, reg); + } + // Can not reverse instruction which will require to + // insert or remove 'not_b1' inside loop + // Also, we avoid reversing for that comparisons + // whose result is spilled in between the definition and use. + if (!can_reverse) { + Cond.push_back(MachineOperand::CreateImm(COND_IRREVERSIBLE)); + continue; + } + + // If there is no uses of condition register we can just reverse + // instruction and be fine + if (is_def_before_use) { + Cond.push_back(MachineOperand::CreateImm(COND_REVERSE_POSITIVE)); + continue; + } + + // There is uses of this instruction somewhere down the control flow + // Try to use RegisterScavenger to get free register + // If there is no such one than do not inverse condition + if (!MRI.tracksLiveness()) { + Cond.push_back(MachineOperand::CreateImm(COND_IRREVERSIBLE)); + continue; + } + + unsigned free_reg = 0; + if (!TargetRegisterInfo::isVirtualRegister(Cond[0].getReg())) { + RS->enterBasicBlock(&MBB); + RS->forward(std::prev(MBB.end())); + + free_reg = RS->FindUnusedReg(&HSAIL::CRRegClass); + if (free_reg == 0) { + Cond.push_back(MachineOperand::CreateImm(COND_IRREVERSIBLE)); + continue; + } + RS->setRegUsed(free_reg); + } + + // Everything is ok - mark condition as reversible + Cond.push_back(MachineOperand::CreateImm(COND_REVERSE_DEPENDANT)); + Cond.push_back(MachineOperand::CreateImm(free_reg)); + Cond.push_back(MachineOperand::CreateImm(COND_REVERSE_POSITIVE)); + continue; + } + + // Can not handle more than one conditional branch + return true; + } + + return false; +} + +static BrigCompareOperation invIntCondOp(BrigCompareOperation Op) { + switch (Op) { + case BRIG_COMPARE_EQ: + return BRIG_COMPARE_NE; + case BRIG_COMPARE_GE: + return BRIG_COMPARE_LT; + case BRIG_COMPARE_GT: + return BRIG_COMPARE_LE; + case BRIG_COMPARE_LE: + return BRIG_COMPARE_GT; + case BRIG_COMPARE_LT: + return BRIG_COMPARE_GE; + case BRIG_COMPARE_NE: + return BRIG_COMPARE_EQ; + default: + return Op; + } +} + +static BrigCompareOperation invFPCondOp(BrigCompareOperation Op) { + switch (Op) { + case BRIG_COMPARE_NUM: + return BRIG_COMPARE_NAN; + case BRIG_COMPARE_EQ: + return BRIG_COMPARE_NEU; + case BRIG_COMPARE_GE: + return BRIG_COMPARE_LTU; + case BRIG_COMPARE_GT: + return BRIG_COMPARE_LEU; + case BRIG_COMPARE_LE: + return BRIG_COMPARE_GTU; + case BRIG_COMPARE_LT: + return BRIG_COMPARE_GEU; + case BRIG_COMPARE_NE: + return BRIG_COMPARE_EQU; + case BRIG_COMPARE_EQU: + return BRIG_COMPARE_NE; + case BRIG_COMPARE_GEU: + return BRIG_COMPARE_LT; + case BRIG_COMPARE_GTU: + return BRIG_COMPARE_LE; + case BRIG_COMPARE_LEU: + return BRIG_COMPARE_GT; + case BRIG_COMPARE_LTU: + return BRIG_COMPARE_GE; + case BRIG_COMPARE_NEU: + return BRIG_COMPARE_EQ; + case BRIG_COMPARE_NAN: + return BRIG_COMPARE_NUM; + default: + return Op; + } +} + +static bool isFPBrigType(BrigType BT) { + switch (BT) { + case BRIG_TYPE_F32: + case BRIG_TYPE_F64: + case BRIG_TYPE_F16: + return true; + default: + return false; + } +} + +// Helper for `HSAILInstrInfo::InsertBranch` +// Reverse branch condition +// Different from `HSAILInstrInfo::ReverseBranchCondition` +// because it actually generates reversion code +// Returns register with condition result +static unsigned GenerateBranchCondReversion(MachineBasicBlock &MBB, + const MachineOperand &CondOp, + const HSAILInstrInfo *TII, + DebugLoc DL) { + assert(CondOp.isReg()); + unsigned cond_reg = CondOp.getReg(); + + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + MachineInstr *cond_expr = nullptr; + + // Manualy search for latest usage of condition register in MBB + MachineBasicBlock::iterator I = MBB.end(); + + while (I != MBB.begin()) { + --I; + + if (I->definesRegister(cond_reg)) { + cond_expr = &*I; + break; + } + } + + // If condition is compare instruction - reverse it + bool need_insert_not = false; + if (cond_expr && cond_expr->isCompare()) { + MachineOperand *CmpOp = TII->getNamedOperand(*cond_expr, HSAIL::OpName::op); + + BrigType CmpType = static_cast( + TII->getNamedOperand(*cond_expr, HSAIL::OpName::sourceType)->getImm()); + + BrigCompareOperation OrigOp = + static_cast(CmpOp->getImm()); + + BrigCompareOperation RevOp = + isFPBrigType(CmpType) ? invFPCondOp(OrigOp) : invIntCondOp(OrigOp); + + if (OrigOp != RevOp) // Can invert the operation. + CmpOp->setImm(RevOp); + else + need_insert_not = true; + } + // If condition is logical not - just remove it + else if (cond_expr && cond_expr->getOpcode() == HSAIL::NOT_B1) { + cond_reg = cond_expr->getOperand(1).getReg(); + cond_expr->eraseFromParent(); + } else + need_insert_not = true; + + // Else insert logical not + if (need_insert_not) { + // If we are before register allocation we need to maintain SSA form + if (TargetRegisterInfo::isVirtualRegister(CondOp.getReg())) + cond_reg = MRI.createVirtualRegister(MRI.getRegClass(CondOp.getReg())); + + BuildMI(&MBB, DL, TII->get(HSAIL::NOT_B1)) + .addReg(cond_reg, RegState::Define) + .addReg(CondOp.getReg()) + .addImm(BRIG_TYPE_B1); + } + + return cond_reg; +} + +unsigned HSAILInstrInfo::InsertBranch( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + ArrayRef Cond, DebugLoc DL) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + + if (Cond.empty()) { + // Unconditional branch? + assert(!FBB && "Unconditional branch with multiple successors!"); + BuildMI(&MBB, DL, get(HSAIL::BR)) + .addImm(BRIG_WIDTH_ALL) + .addMBB(TBB) + .addImm(BRIG_TYPE_NONE); + return 1; + } + + // AnalyzeBranch can handle only one condition + if (Cond.size() != 2 && Cond.size() != 4) + return 0; + + // Conditional branch. + // According to HSAIL spec condition MUST be a control register + assert(Cond[0].isReg()); + unsigned cond_reg = Cond[0].getReg(); + + // Reverse condition + switch (static_cast(Cond[1].getImm())) { + case COND_REVERSE_DEPENDANT: + assert(Cond.size() == 4 && Cond[2].isImm()); + + if (Cond[3].getImm() == COND_REVERSE_NEGATIVE) { + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + if (TargetRegisterInfo::isVirtualRegister(Cond[0].getReg())) + cond_reg = MRI.createVirtualRegister(MRI.getRegClass(Cond[0].getReg())); + else + cond_reg = Cond[2].getImm(); + + BuildMI(&MBB, DL, get(HSAIL::NOT_B1)) + .addReg(cond_reg, RegState::Define) + .addReg(Cond[0].getReg()) + .addImm(BRIG_TYPE_B1); + } + + break; + case COND_REVERSE_NEGATIVE: + cond_reg = GenerateBranchCondReversion(MBB, Cond[0], this, DL); + break; + case COND_REVERSE_POSITIVE: + case COND_IRREVERSIBLE: + // Do nothing + break; + } + + unsigned Count = 0; + + BuildMI(&MBB, DL, get(HSAIL::CBR)) + .addImm(BRIG_WIDTH_1) + .addReg(cond_reg) + .addMBB(TBB) + .addImm(BRIG_TYPE_B1); + + ++Count; + + if (FBB) { + // Two-way Conditional branch. Insert the second branch. + BuildMI(&MBB, DL, get(HSAIL::BR)) + .addImm(BRIG_WIDTH_ALL) + .addMBB(FBB) + .addImm(BRIG_TYPE_NONE); + + ++Count; + } + + return Count; +} + +unsigned int HSAILInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + unsigned Count = 0; + + while (I != MBB.begin()) { + --I; + if (I->isDebugValue()) + continue; + + if (I->getOpcode() != HSAIL::BR && I->getOpcode() != HSAIL::CBR) + break; + + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + } + + return Count; +} + +void HSAILInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, + int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + unsigned int Opc = 0; + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + DebugLoc DL; + + unsigned BT; + switch (RC->getID()) { + default: + llvm_unreachable("unrecognized TargetRegisterClass"); + break; + case HSAIL::GPR32RegClassID: + Opc = HSAIL::ST_U32; + BT = BRIG_TYPE_U32; + break; + case HSAIL::GPR64RegClassID: + Opc = HSAIL::ST_U64; + BT = BRIG_TYPE_U64; + break; + case HSAIL::CRRegClassID: { + HSAILMachineFunctionInfo *MFI = MF.getInfo(); + MFI->setHasSpilledCRs(); + Opc = HSAIL::SPILL_B1; + BT = BRIG_TYPE_B1; + break; + } + } + if (MI != MBB.end()) { + DL = MI->getDebugLoc(); + } + + switch (RC->getID()) { + default: + llvm_unreachable("unrecognized TargetRegisterClass"); + break; + case HSAIL::CRRegClassID: + case HSAIL::GPR32RegClassID: + case HSAIL::GPR64RegClassID: { + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(FrameIndex), + MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) // src + .addFrameIndex(FrameIndex) // address_base + .addReg(HSAIL::NoRegister) // address_reg + .addImm(0) // address_offset + .addImm(BT) // TypeLength + .addImm(HSAILAS::SPILL_ADDRESS) // segment + .addImm(MMO->getAlignment()) + .addMemOperand(MMO); + break; + } + } +} + +void HSAILInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + unsigned int Opc = 0; + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + DebugLoc DL; + + unsigned BT; + switch (RC->getID()) { + default: + llvm_unreachable("unrecognized TargetRegisterClass"); + break; + case HSAIL::GPR32RegClassID: + Opc = HSAIL::LD_U32; + BT = BRIG_TYPE_U32; + break; + case HSAIL::GPR64RegClassID: + Opc = HSAIL::LD_U64; + BT = BRIG_TYPE_U64; + break; + case HSAIL::CRRegClassID: + Opc = HSAIL::RESTORE_B1; + BT = BRIG_TYPE_B1; + break; + } + if (MI != MBB.end()) { + DL = MI->getDebugLoc(); + } + + switch (RC->getID()) { + default: + llvm_unreachable("unrecognized TargetRegisterClass"); + break; + case HSAIL::GPR32RegClassID: + case HSAIL::GPR64RegClassID: + case HSAIL::CRRegClassID: { + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(FrameIndex), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlignment(FrameIndex)); + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(DestReg, RegState::Define) // dest + .addFrameIndex(FrameIndex) // address_base + .addReg(HSAIL::NoRegister) // address_reg + .addImm(0) // address_offset + .addImm(BT) // TypeLength + .addImm(HSAILAS::SPILL_ADDRESS) // segment + .addImm(MMO->getAlignment()) // align + .addImm(BRIG_WIDTH_1) // width + .addImm(0) // mask + .addMemOperand(MMO); + break; + } + } +} + +bool HSAILInstrInfo::areLoadsFromSameBasePtr(SDNode *Node1, SDNode *Node2, + int64_t &Offset1, + int64_t &Offset2) const { + // Warning! This function will handle not only load but store nodes too + // because there is no real difference between memory operands in loads and + // stores. + // Do not change name of this function to avoid more changes in core llvm. + + if (!Node1->isMachineOpcode() || !Node2->isMachineOpcode()) + return false; + + MachineSDNode *mnode1 = cast(Node1), + *mnode2 = cast(Node2); + + if (mnode1->memoperands_empty() || mnode2->memoperands_empty()) + return false; + + if ((mnode1->memoperands_begin() + 1) != mnode1->memoperands_end() || + (mnode2->memoperands_begin() + 1) != mnode2->memoperands_end()) + return false; + + MachineMemOperand *mo1, *mo2; + + mo1 = *mnode1->memoperands_begin(); + mo2 = *mnode2->memoperands_begin(); + + // TODO_HSA: Consider extension types to be checked explicitly + if (mo1->getSize() != mo2->getSize() || + mo1->getPointerInfo().getAddrSpace() != + mo2->getPointerInfo().getAddrSpace() || + mo1->getValue() != mo2->getValue() || + mo1->getFlags() != mo2->getFlags()) { + return false; + } + + Offset1 = mo1->getOffset(); + Offset2 = mo2->getOffset(); + + return true; +} + +bool HSAILInstrInfo::shouldScheduleLoadsNear(SDNode *Node1, SDNode *Node2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + // Warning! This function will handle not only load but store nodes too + // because there is no real difference between memory operands in loads and + // stores. + + // Assume that 'areLoadsFromSameBasePtr' returned true + + if (!Node1->isMachineOpcode()) + return false; + + MachineSDNode *mnode1 = cast(Node1); + + // Check that loads are close enough + if (Offset2 - Offset1 <= + 4 * (int64_t)(*mnode1->memoperands_begin())->getSize()) + return true; + return false; +} + +bool HSAILInstrInfo::ReverseBranchCondition( + SmallVectorImpl &Cond) const { + if (Cond.size() < 2) + return true; + + // AnalyzeBranch should always return conditions as pairs + assert(Cond.size() % 2 == 0); + + for (SmallVectorImpl::iterator I = Cond.begin(), + E = Cond.end(); + I != E; ++I) { + ++I; + if (static_cast(I->getImm()) == COND_IRREVERSIBLE) + return true; + } + + for (SmallVectorImpl::iterator I = Cond.begin(), + E = Cond.end(); + I != E; ++I) { + ++I; + + assert(I->isImm()); + + CondReverseFlag cond_rev_flag = static_cast(I->getImm()); + + switch (cond_rev_flag) { + case COND_REVERSE_POSITIVE: + cond_rev_flag = COND_REVERSE_NEGATIVE; + break; + case COND_REVERSE_NEGATIVE: + cond_rev_flag = COND_REVERSE_POSITIVE; + break; + case COND_REVERSE_DEPENDANT: + cond_rev_flag = COND_REVERSE_DEPENDANT; + break; + default: + llvm_unreachable("Unknown cond reverse flag"); + } + + I->setImm(cond_rev_flag); + } + + return false; +} + +bool HSAILInstrInfo::isSafeToMoveRegClassDefs( + const TargetRegisterClass *RC) const { + // Micah: HSAIL does not have any constraints about moving defs. + return true; +} + +void HSAILInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (HSAIL::GPR32RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(HSAIL::MOV_B32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(BRIG_TYPE_B32); + return; + } + + if (HSAIL::GPR64RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(HSAIL::MOV_B64), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(BRIG_TYPE_B64); + return; + } + + if (HSAIL::CRRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, MI, DL, get(HSAIL::MOV_B1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(BRIG_TYPE_B1); + return; + } + + unsigned SrcBT = -1; + unsigned DestBT = -1; + unsigned CvtOpc = -1; + + if (HSAIL::GPR32RegClass.contains(DestReg) && + HSAIL::CRRegClass.contains(SrcReg)) { + DestBT = BRIG_TYPE_B1; + SrcBT = BRIG_TYPE_U32; + CvtOpc = HSAIL::CVT_B1_U32; + } else if (HSAIL::CRRegClass.contains(DestReg) && + HSAIL::GPR32RegClass.contains(SrcReg)) { + DestBT = BRIG_TYPE_U32; + SrcBT = BRIG_TYPE_B1; + CvtOpc = HSAIL::CVT_U32_B1; + } else if (HSAIL::GPR64RegClass.contains(DestReg) && + HSAIL::GPR32RegClass.contains(SrcReg)) { + DestBT = BRIG_TYPE_U32; + SrcBT = BRIG_TYPE_U64; + CvtOpc = HSAIL::CVT_U32_U64; + } else if (HSAIL::GPR32RegClass.contains(DestReg) && + HSAIL::GPR64RegClass.contains(SrcReg)) { + // Truncation can occur if a function was defined with different return + // types in different places. + DestBT = BRIG_TYPE_U64; + SrcBT = BRIG_TYPE_U32; + CvtOpc = HSAIL::CVT_U64_U32; + } else { + assert(!"When do we hit this?"); + return TargetInstrInfo::copyPhysReg(MBB, MI, DL, DestReg, SrcReg, KillSrc); + } + + BuildMI(MBB, MI, DL, get(CvtOpc), DestReg) + .addImm(0) // ftz + .addImm(0) // round + .addImm(DestBT) // destTypedestLength + .addImm(SrcBT) // srcTypesrcLength + .addReg(SrcReg, getKillRegState(KillSrc)); +} + +bool HSAILInstrInfo::expandPostRAPseudo( + MachineBasicBlock::iterator MBBI) const { + MachineInstr &MI = *MBBI; + return HSAILGenInstrInfo::expandPostRAPseudo(MI); +} + +const TargetRegisterClass * +HSAILInstrInfo::getOpRegClass(const MachineRegisterInfo &MRI, + const MachineInstr &MI, unsigned OpNo) const { + + const MachineOperand &MO = MI.getOperand(OpNo); + if (!MO.isReg()) + return nullptr; + + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return MRI.getRegClass(Reg); + + return RI.getPhysRegClass(Reg); +} + +bool HSAILInstrInfo::verifyInstruction(const MachineInstr *MI, + StringRef &ErrInfo) const { + return true; +} + +MachineOperand *HSAILInstrInfo::getNamedOperand(MachineInstr &MI, + unsigned OperandName) const { + int Idx = HSAIL::getNamedOperandIdx(MI.getOpcode(), OperandName); + if (Idx == -1) + return nullptr; + + return &MI.getOperand(Idx); +} +} + +// FIXME: Should just use generated version directly. +int HSAIL::getVectorLdStOpcode(uint16_t Opcode, unsigned vsize) { + // HSAIL::vec_size enum is generated from instruction mappings and defined in + // HSAILGenInstrInfo.inc. It starts with vec_size_1 value which is equal to + // zero, so we need to subtract one from size. + return HSAIL::getLdStVectorOpcode(Opcode, HSAIL::vec_size(vsize - 1)); +} Index: lib/Target/HSAIL/HSAILInstrInfo.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstrInfo.td @@ -0,0 +1,444 @@ +//==- HSAILInstrInfo.td - Main HSAIL Instruction Definition -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the HSAIL instruction set, defining the instructions, and +// properties of the instructions which are needed for code generation, machine +// code emission, and analysis. +// +//===----------------------------------------------------------------------===// + +def SmallModel : Predicate<"Subtarget->isSmallModel()">; +def LargeModel : Predicate<"Subtarget->isLargeModel()">; +def EnableOpt : Predicate<"CodeGenOpt::None != TM.getOptLevel()">; + +// Mark the default value for a width modifier. This only effects how +// the instruction is printed in the non-BRIG path. +class WidthAttrValues_ { + bits<2> NONE = 0; + bits<2> ALL = 1; + bits<2> WAVESIZE = 2; + bits<2> ONE = 3; +} + +def WidthAttrValues : WidthAttrValues_; + + +class AtomicNoRet { + string NoRetOp = noRetOp; + bit IsRet = isRet; +} + +// Maps an atomic opcode to its version with a return value. +def getAtomicRetOp : InstrMapping { + let FilterClass = "AtomicNoRet"; + let RowFields = ["NoRetOp"]; + let ColFields = ["IsRet"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + +// Maps an atomic opcode to its returnless version. +def getAtomicNoRetOp : InstrMapping { + let FilterClass = "AtomicNoRet"; + let RowFields = ["NoRetOp"]; + let ColFields = ["IsRet"]; + let KeyCol = ["1"]; + let ValueCols = [["0"]]; +} + +class LdStVectorMap { + string opcode = op; + int vec_size = size; +} + +def getLdStVectorOpcode : InstrMapping { + let FilterClass = "LdStVectorMap"; + let RowFields = ["opcode"]; + let ColFields = ["vec_size"]; + let KeyCol = ["1"]; + let ValueCols = [["1"], ["2"], ["3"], ["4"]]; +} + +class HSAILDestOperand : RegisterOperand ; + +// Normal source operand which can be an immediate or a register. +class HSAILSrcOperand : RegisterOperand { + let OperandNamespace = "HSAIL"; + let OperandType = "OPERAND_REG_IMM"; +} + +def HSAILDest1Operand : HSAILDestOperand; +def HSAILDest32Operand : HSAILDestOperand; +def HSAILDest64Operand : HSAILDestOperand; + +def HSAILSrc1Operand : HSAILSrcOperand; +def HSAILSrc32Operand : HSAILSrcOperand; +def HSAILSrc64Operand : HSAILSrcOperand; + + +class getRegOpForVT { + RegisterOperand ret = !if(!eq(VT.Size, 32), HSAILSrc32Operand, + !if(!eq(VT.Size, 64), HSAILSrc64Operand, + HSAILSrc1Operand)); // else VT == i1 +} + +class getDestRegOpForVT { + RegisterOperand ret = !if(!eq(VT.Size, 32), HSAILDest32Operand, + !if(!eq(VT.Size, 64), HSAILDest64Operand, + HSAILDest1Operand)); // else VT == i1 +} + +class getRegClassForVT { + RegisterClass ret = !if(!eq(VT.Size, 32), GPR32, + !if(!eq(VT.Size, 64), GPR64, + CR)); // else VT == i1 +} + +class getDestRegClassForVT { + RegisterClass ret = !if(!eq(VT.Size, 32), GPR32, + !if(!eq(VT.Size, 64), GPR64, + CR)); // else VT == i1 +} + +class BRIGType { + field int BT = bt; + field ValueType VT = vt; + field string Name = name; + field string InstName = instName; + field RegisterClass SrcRC = getRegClassForVT.ret; +} + +class HSAILOperand : RegisterOperand { + field BRIGType BT = ty; + field ValueType VT = ty.VT; + let PrintMethod = pm; + + let OperandNamespace = "HSAIL"; + let OperandType = "OPERAND_REG_IMM"; +} + +class HSAILProfile ArgBT> { + field ValueType DestVT = ArgBT[0].VT; + field ValueType Src0VT = ArgBT[1].VT; + field ValueType Src1VT = ArgBT[2].VT; + field ValueType Src2VT = ArgBT[3].VT; + field ValueType Src3VT = ArgBT[4].VT; + + field HSAILOperand DestRC = ArgBT[0]; + field HSAILOperand Src0RC = ArgBT[1]; + field HSAILOperand Src1RC = ArgBT[2]; + field HSAILOperand Src2RC = ArgBT[3]; + field HSAILOperand Src3RC = ArgBT[4]; +} + +//===----------------------------------------------------------------------===// +// Custom Operands +//===----------------------------------------------------------------------===// +include "HSAILOperands.td" +include "HSAILEnums.td" + +def UntypedTy : BRIGType; + +def B1Ty : BRIGType; +def B32Ty : BRIGType; +def B64Ty : BRIGType; + +def S32Ty : BRIGType; +def S64Ty : BRIGType; + +def U32Ty : BRIGType; +def U64Ty : BRIGType; + +// Deal with cases that still assume f16 is an i32. +def F16Ty_i32 : BRIGType; +def F16Ty_f32 : BRIGType; + +def F16Ty : BRIGType; +def F32Ty : BRIGType; +def F64Ty : BRIGType; + +def U8X4Ty : BRIGType; +def U8X8Ty : BRIGType; + +def S8X4Ty : BRIGType; +def S8X8Ty : BRIGType; + +def U16X2Ty : BRIGType; + +def U16X4Ty : BRIGType; +def S16X4Ty : BRIGType; + +def U32X2Ty : BRIGType; +def S32X2Ty : BRIGType; + +def F16X2Ty : BRIGType; +def F16X4Ty : BRIGType; + +def UntypedOp : HSAILOperand; + +def B1Op : HSAILOperand; +def B32Op : HSAILOperand; +def B64Op : HSAILOperand; + +def F16Op : HSAILOperand; + +def S32Op : HSAILOperand; +def U32Op : HSAILOperand; +def F32Op : HSAILOperand; + +def S64Op : HSAILOperand; +def U64Op : HSAILOperand; +def F64Op : HSAILOperand; + +def U8X4Op : HSAILOperand; +def U8X8Op : HSAILOperand; + +def S8X4Op : HSAILOperand; +def S8X8Op : HSAILOperand; + +def U16X2Op : HSAILOperand; + +def U16X4Op : HSAILOperand; +def S16X4Op : HSAILOperand; + +def U32X2Op : HSAILOperand; +def S32X2Op : HSAILOperand; + +def F16X2Op : HSAILOperand; +def F16X4Op : HSAILOperand; + +def Vec2SrcOpU32 : Operand { + let MIOperandInfo = (ops U32Op, U32Op); + let PrintMethod = "printV2U32"; +} + +def Vec2SrcOpF32 : Operand { + let MIOperandInfo = (ops F32Op, F32Op); + let PrintMethod = "printV2F32"; +} + +def Vec2SrcOpU64 : Operand { + let MIOperandInfo = (ops U64Op, U64Op); + let PrintMethod = "printV2U64"; +} + +def Vec2SrcOpF64 : Operand { + let MIOperandInfo = (ops F64Op, F64Op); + let PrintMethod = "printV2F64"; +} + + +def Vec3SrcOpU32 : Operand { + let MIOperandInfo = (ops U32Op, U32Op, U32Op); + let PrintMethod = "printV3U32"; +} + +def Vec3SrcOpF32 : Operand { + let MIOperandInfo = (ops F32Op, F32Op, F32Op); + let PrintMethod = "printV3F32"; +} + +def Vec3SrcOpU64 : Operand { + let MIOperandInfo = (ops U64Op, U64Op, U64Op); + let PrintMethod = "printV3U64"; +} + +def Vec3SrcOpF64 : Operand { + let MIOperandInfo = (ops F64Op, F64Op, F64Op); + let PrintMethod = "printV3F64"; +} + + +def Vec4SrcOpU32 : Operand { + let MIOperandInfo = (ops U32Op, U32Op, U32Op, U32Op); + let PrintMethod = "printV4U32"; +} + +def Vec4SrcOpF32 : Operand { + let MIOperandInfo = (ops F32Op, F32Op, F32Op, F32Op); + let PrintMethod = "printV4F32"; +} + +def Vec4SrcOpU64 : Operand { + let MIOperandInfo = (ops U64Op, U64Op, U64Op, U64Op); + let PrintMethod = "printV4U64"; +} + +def Vec4SrcOpF64 : Operand { + let MIOperandInfo = (ops F64Op, F64Op, F64Op, F64Op); + let PrintMethod = "printV4F64"; +} + + +def Inst_Void : HSAILProfile<[UntypedOp, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_S32_S32 : HSAILProfile<[S32Op, S32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_S32_S32_S32 : HSAILProfile<[S32Op, S32Op, S32Op, UntypedOp, UntypedOp]>; +def Inst_S32_S32_S32_S32 : HSAILProfile<[S32Op, S32Op, S32Op, S32Op, UntypedOp]>; + +def Inst_S64_S64 : HSAILProfile<[S64Op, S64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_S64_S64_S64 : HSAILProfile<[S64Op, S64Op, S64Op, UntypedOp, UntypedOp]>; +def Inst_S64_S64_S64_S64 : HSAILProfile<[S64Op, S64Op, S64Op, S64Op, UntypedOp]>; + +def Inst_U32 : HSAILProfile<[U32Op, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_U32 : HSAILProfile<[U32Op, U32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_U32_U32 : HSAILProfile<[U32Op, U32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_U32_U32_U32_U32 : HSAILProfile<[U32Op, U32Op, U32Op, U32Op, UntypedOp]>; + +def Inst_U64 : HSAILProfile<[U64Op, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U64_U64 : HSAILProfile<[U64Op, U64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U64_U64_U64 : HSAILProfile<[U64Op, U64Op, U64Op, UntypedOp, UntypedOp]>; +def Inst_U64_U64_U64_U64 : HSAILProfile<[U64Op, U64Op, U64Op, U64Op, UntypedOp]>; + +def Inst_B1_B1 : HSAILProfile<[B1Op, B1Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B1_B1_B1 : HSAILProfile<[B1Op, B1Op, B1Op, UntypedOp, UntypedOp]>; +def Inst_B1_B1_B1_B1 : HSAILProfile<[B1Op, B1Op, B1Op, B1Op, UntypedOp]>; + +def Inst_B32_B32 : HSAILProfile<[B32Op, B32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B32_B32_B32 : HSAILProfile<[B32Op, B32Op, B32Op, UntypedOp, UntypedOp]>; +def Inst_B32_B32_B32_B32 : HSAILProfile<[B32Op, B32Op, B32Op, B32Op, UntypedOp]>; + +def Inst_B64_B64 : HSAILProfile<[B64Op, B64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B64_B64_B64 : HSAILProfile<[B64Op, B64Op, B64Op, UntypedOp, UntypedOp]>; +def Inst_B64_B64_B64_B64 : HSAILProfile<[B64Op, B64Op, B64Op, B64Op, UntypedOp]>; + +def Inst_F32_F32 : HSAILProfile<[F32Op, F32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_F32_F32_F32 : HSAILProfile<[F32Op, F32Op, F32Op, UntypedOp, UntypedOp]>; +def Inst_F32_F32_F32_F32 : HSAILProfile<[F32Op, F32Op, F32Op, F32Op, UntypedOp]>; + +def Inst_F64_F64 : HSAILProfile<[F64Op, F64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_F64_F64_F64 : HSAILProfile<[F64Op, F64Op, F64Op, UntypedOp, UntypedOp]>; +def Inst_F64_F64_F64_F64 : HSAILProfile<[F64Op, F64Op, F64Op, F64Op, UntypedOp]>; + +def Inst_B32_B1_B32_B32 : HSAILProfile<[B32Op, B1Op, B32Op, B32Op, UntypedOp, UntypedOp]>; +def Inst_B64_B1_B64_B64 : HSAILProfile<[B64Op, B1Op, B64Op, B64Op, UntypedOp, UntypedOp]>; + +def Inst_F32_B1_F32_F32 : HSAILProfile<[F32Op, B1Op, F32Op, F32Op, UntypedOp, UntypedOp]>; +def Inst_F64_B1_F64_F64 : HSAILProfile<[F64Op, B1Op, F64Op, F64Op, UntypedOp, UntypedOp]>; + +def Inst_U8X4_U8X4_U8X4_U8X4 : HSAILProfile<[U8X4Op, U8X4Op, U8X4Op, U8X4Op, UntypedOp]>; +def Inst_B64_B64_B32_B64 : HSAILProfile<[B64Op, B64Op, B32Op, B64Op, UntypedOp]>; + +def Inst_S64_S64_U32_U32 : HSAILProfile<[S64Op, S64Op, U32Op, U32Op, UntypedOp]>; +def Inst_U64_U64_U32_U32 : HSAILProfile<[U64Op, U64Op, U32Op, U32Op, UntypedOp]>; +def Inst_S32_S32_U32_U32 : HSAILProfile<[S32Op, S32Op, U32Op, U32Op, UntypedOp]>; + +def Inst_S64_S64_U32 : HSAILProfile<[S64Op, S64Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_U64_U64_U32 : HSAILProfile<[U64Op, U64Op, U32Op, UntypedOp, UntypedOp]>; + +def Inst_F32_F32_U32 : HSAILProfile<[F32Op, F32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_F64_F64_U32 : HSAILProfile<[F64Op, F64Op, U32Op, UntypedOp, UntypedOp]>; + +def Inst_U32_B64 : HSAILProfile<[U32Op, B64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_B32 : HSAILProfile<[U32Op, B32Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_U32_S64 : HSAILProfile<[U32Op, S64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_U64 : HSAILProfile<[U32Op, U64Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U32_S32 : HSAILProfile<[U32Op, S32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_U64_U32 : HSAILProfile<[U64Op, U32Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_U32_B1 : HSAILProfile<[U32Op, B1Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B1_U32 : HSAILProfile<[B1Op, U32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B1_U64 : HSAILProfile<[B1Op, U64Op, UntypedOp, UntypedOp, UntypedOp]>; + + +def Inst_U8X4_U8X4_U32_U32 : HSAILProfile<[U8X4Op, U8X4Op, U32Op, U32Op, UntypedOp]>; +def Inst_S8X4_S8X4_S32_U32 : HSAILProfile<[S8X4Op, S8X4Op, S32Op, U32Op, UntypedOp]>; + +def Inst_U8X8_U8X8_U32_U32 : HSAILProfile<[U8X8Op, U8X8Op, U32Op, U32Op, UntypedOp]>; +def Inst_S8X8_S8X8_S32_U32 : HSAILProfile<[S8X8Op, S8X8Op, S32Op, U32Op, UntypedOp]>; + +def Inst_U16X4_U16X4_U32_U32 : HSAILProfile<[U16X4Op, U16X4Op, U32Op, U32Op, UntypedOp]>; +def Inst_S16X4_S16X4_S32_U32 : HSAILProfile<[S16X4Op, S16X4Op, S32Op, U32Op, UntypedOp]>; + +def Inst_U32X2_U32X2_U32_U32 : HSAILProfile<[U32X2Op, U32X2Op, U32Op, U32Op, UntypedOp]>; +def Inst_S32X2_S32X2_S32_U32 : HSAILProfile<[S32X2Op, S32X2Op, S32Op, U32Op, UntypedOp]>; + +def Inst_F16X2_F16X2_F16_U32 : HSAILProfile<[F16X2Op, F16X2Op, F16Op, U32Op, UntypedOp]>; +def Inst_F16X4_F16X4_F16_U32 : HSAILProfile<[F16X4Op, F16X4Op, F16Op, U32Op, UntypedOp]>; + + +def Inst_U8X4_F32_F32_F32_F32 : HSAILProfile<[U8X4Op, F32Op, F32Op, F32Op, F32Op]>; +def Inst_F32_U8X4_U32 : HSAILProfile<[F32Op, U8X4Op, U32Op, UntypedOp, UntypedOp]>; + +def Inst_U32_U16X2_U16X2_U32 : HSAILProfile<[U32Op, U16X2Op, U16X2Op, U32Op, UntypedOp]>; +def Inst_U32_U8X4_U8X4_U32 : HSAILProfile<[U32Op, U8X4Op, U8X4Op, U32Op, UntypedOp]>; +def Inst_U16X2_U8X4_U8X4_U16X2 : HSAILProfile<[U16X2Op, U8X4Op, U8X4Op, U16X2Op, UntypedOp]>; + +def Inst_B1_F32_U32 : HSAILProfile<[B1Op, F32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_B1_F64_U32 : HSAILProfile<[B1Op, F64Op, U32Op, UntypedOp, UntypedOp]>; + +def Inst_B1_B1_U32_B1_B1 : HSAILProfile<[B1Op, B1Op, U32Op, B1Op, B1Op]>; +def Inst_B32_B32_U32_B32_B1 : HSAILProfile<[B32Op, B32Op, U32Op, B32Op, B1Op]>; +def Inst_B64_B64_U32_B64_B1 : HSAILProfile<[B64Op, B64Op, U32Op, B64Op, B1Op]>; + + +// Compare profiles. +def Inst_B1_S32_S32 : HSAILProfile<[B1Op, S32Op, S32Op, UntypedOp, UntypedOp]>; +def Inst_B1_S64_S64 : HSAILProfile<[B1Op, S64Op, S64Op, UntypedOp, UntypedOp]>; + +def Inst_B1_U32_U32 : HSAILProfile<[B1Op, U32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_B1_U64_U64 : HSAILProfile<[B1Op, U64Op, U64Op, UntypedOp, UntypedOp]>; + +def Inst_B1_F32_F32 : HSAILProfile<[B1Op, F32Op, F32Op, UntypedOp, UntypedOp]>; +def Inst_B1_F64_F64 : HSAILProfile<[B1Op, F64Op, F64Op, UntypedOp, UntypedOp]>; + +// Atomic profiles. +def Inst_Void_B32 : HSAILProfile<[UntypedOp, B32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_Void_B64 : HSAILProfile<[UntypedOp, B64Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_Void_S32 : HSAILProfile<[UntypedOp, S32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_Void_S64 : HSAILProfile<[UntypedOp, S64Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_Void_U32 : HSAILProfile<[UntypedOp, U32Op, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_Void_U64 : HSAILProfile<[UntypedOp, U64Op, UntypedOp, UntypedOp, UntypedOp]>; + +def Inst_B32 : HSAILProfile<[B32Op, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; +def Inst_B64 : HSAILProfile<[B64Op, UntypedOp, UntypedOp, UntypedOp, UntypedOp]>; + + +def Inst_U32_U32_U32_U32_U32 : HSAILProfile<[U32Op, U32Op, U32Op, U32Op, U32Op]>; +def Inst_U64_U64_U64_U32_U32 : HSAILProfile<[U64Op, U64Op, U64Op, U32Op, U32Op]>; + +def Inst_S32_S32_S32_U32_U32 : HSAILProfile<[S32Op, S32Op, S32Op, U32Op, U32Op]>; +def Inst_S64_S64_S64_U32_U32 : HSAILProfile<[S64Op, S64Op, S64Op, U32Op, U32Op]>; + +def Inst_B32_U32_U32 : HSAILProfile<[B32Op, U32Op, U32Op, UntypedOp, UntypedOp]>; +def Inst_B64_U32_U32 : HSAILProfile<[B64Op, U32Op, U32Op, UntypedOp, UntypedOp]>; + + +//===----------------------------------------------------------------------===// +// Custom Selection DAG Type Profiles +//===----------------------------------------------------------------------===// +include "HSAILProfiles.td" + +//===----------------------------------------------------------------------===// +// Custom Selection DAG Nodes +//===----------------------------------------------------------------------===// +include "HSAILNodes.td" + +//===----------------------------------------------------------------------===// +// Custom Pattern DAG Nodes +//===----------------------------------------------------------------------===// +include "HSAILPatterns.td" + +//===----------------------------------------------------------------------===// +// Instruction format classes +//===----------------------------------------------------------------------===// +include "HSAILInstrFormats.td" + +//===----------------------------------------------------------------------===// +// Intrinsics support +//===----------------------------------------------------------------------===// +include "HSAILIntrinsics.td" + +//===----------------------------------------------------------------------===// +// Instructions support +//===----------------------------------------------------------------------===// +include "HSAILInstructions.td" Index: lib/Target/HSAIL/HSAILInstructions.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILInstructions.td @@ -0,0 +1,56 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// All basic int and fp arithmetic instructions, shifts, bit manipulation, +// moves, cmoves and multimedia like unpack. +// If we have full support of multimedia we would need to extract latter into a +// separate file. +include "HSAILArithmetic.td" + +//////////////////////////////////////////////////////////////////////////////// +// All comparisons and testing, including class_f32|64 +include "HSAILComparisons.td" + +//////////////////////////////////////////////////////////////////////////////// +// All atomic operations +include "HSAILAtomics.td" + +//////////////////////////////////////////////////////////////////////////////// +// Special HSAIL operations like NDRange queries, barriers, syncs etc +include "HSAILSpecial.td" + +//////////////////////////////////////////////////////////////////////////////// +// All control transfer instructions including call, ret and branches +include "HSAILControlFlow.td" + +//////////////////////////////////////////////////////////////////////////////// +// All conversions including bitcasts resulting in plain moves +include "HSAILConversions.td" + +//////////////////////////////////////////////////////////////////////////////// +// All loads and stores including kernarg and arg operations, argscopes and +// params. Image and sampler parameter manipulation operations are also here. +// stof and ftos operations are here as well. +include "HSAILLoadStore.td" + +//////////////////////////////////////////////////////////////////////////////// +// All image operations except image parameters handling +include "HSAILImages.td" + +//////////////////////////////////////////////////////////////////////////////// +// Fused opcodes folding a complex dag into a single instruction or a short +// instruction sequence, like fma or rsqrt. +// This file is for optimization purposes. Its inclusion is not required for +// valid code generation. For example, rsqrt is defined twice - first time in +// the HSAILArithmetic.td to be used with intrinsic expansion and second time +// here, but with a pattern folding div and sqrt into a single instruction. +// For that reason HSAILFusion.td is always last, so no other pattern would +// accidentally refer any of its opcodes. +include "HSAILFusion.td" Index: lib/Target/HSAIL/HSAILIntrinsicInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILIntrinsicInfo.h @@ -0,0 +1,63 @@ +//===------------ HSAILIntrinsicInfo.h - HSAILIntrinsic Info ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the target intrinsic instructions to the code generator. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILINTRINSICINFO_H +#define LLVM_LIB_TARGET_HSAIL_HSAILINTRINSICINFO_H + +#include "llvm/IR/Intrinsics.h" +#include "llvm/Target/TargetIntrinsicInfo.h" + +namespace llvm { + +class Function; +class Module; +class Type; + +class HSAILTargetMachine; + +namespace HSAILIntrinsic { +enum ID { + last_non_HSAIL_intrinsic = Intrinsic::num_intrinsics - 1, +#define GET_INTRINSIC_ENUM_VALUES +#include "HSAILGenIntrinsics.inc" +#undef GET_INTRINSIC_ENUM_VALUES + , + num_HSAIL_intrinsics +}; +} + +//--------------------------------------------------------------------------- +/// +/// HSAILIntrinsicInfo - Interface to description of machine intrinsic set +/// +class HSAILIntrinsicInfo : public TargetIntrinsicInfo { +public: + HSAILIntrinsicInfo(HSAILTargetMachine *tm); + + std::string getName(unsigned IID, Type **Tys = nullptr, + unsigned numTys = 0) const override; + + unsigned lookupName(const char *Name, unsigned Len) const override; + + bool isOverloaded(unsigned IID) const override; + + Function *getDeclaration(Module *M, unsigned ID, Type **Tys = nullptr, + unsigned numTys = 0) const override; + + static bool isReadImage(llvm::HSAILIntrinsic::ID intr); + static bool isLoadImage(llvm::HSAILIntrinsic::ID intr); +}; + +} // End llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILIntrinsicInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILIntrinsicInfo.cpp @@ -0,0 +1,143 @@ +//===-- HSAILIntrinsicInfo.cpp - HSAIL Intrinsic Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file contains the HSAIL Implementation of the IntrinsicInfo class. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "HSAILIntrinsicInfo.h" +#include "HSAILTargetMachine.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Intrinsics.h" +using namespace llvm; + +#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +#include "HSAILGenIntrinsics.inc" +#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + +bool HSAILIntrinsicInfo::isReadImage(HSAILIntrinsic::ID intr) { + switch (intr) { + default: + return false; + + case HSAILIntrinsic::HSAIL_rd_imgf_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgi_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgi_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_1d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_1d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_1da_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_1da_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_2d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_2d_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_2da_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_2da_s32: + case HSAILIntrinsic::HSAIL_rd_imgui_3d_f32: + case HSAILIntrinsic::HSAIL_rd_imgui_3d_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_2ddepth_s32: + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_f32: + case HSAILIntrinsic::HSAIL_rd_imgf_2dadepth_s32: + return true; + } +} + +bool HSAILIntrinsicInfo::isLoadImage(HSAILIntrinsic::ID intr) { + switch (intr) { + default: + return false; + + case HSAILIntrinsic::HSAIL_ld_imgf_1d_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_1da_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_1db_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_2d_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_2da_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_3d_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_1d_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_1da_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_1db_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_2d_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_2da_u32: + case HSAILIntrinsic::HSAIL_ld_imgi_3d_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_1d_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_1da_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_1db_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_2d_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_2da_u32: + case HSAILIntrinsic::HSAIL_ld_imgui_3d_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_2ddepth_u32: + case HSAILIntrinsic::HSAIL_ld_imgf_2dadepth_u32: + return true; + } +} + +HSAILIntrinsicInfo::HSAILIntrinsicInfo(HSAILTargetMachine *tm) + : TargetIntrinsicInfo() {} + +std::string HSAILIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, + unsigned int numTys) const { + static const char *const names[] = { +#define GET_INTRINSIC_NAME_TABLE +#include "HSAILGenIntrinsics.inc" +#undef GET_INTRINSIC_NAME_TABLE + }; + + if (IntrID < Intrinsic::num_intrinsics) { + return 0; + } + assert(IntrID < HSAILIntrinsic::num_HSAIL_intrinsics && + "Invalid intrinsic ID"); + + std::string Result(names[IntrID - Intrinsic::num_intrinsics]); + return Result; +} + +unsigned HSAILIntrinsicInfo::lookupName(const char *Name, + unsigned Len) const { +#define GET_FUNCTION_RECOGNIZER +#include "HSAILGenIntrinsics.inc" +#undef GET_FUNCTION_RECOGNIZER + return getIntrinsicForGCCBuiltin("HSAIL", Name); +} + +bool HSAILIntrinsicInfo::isOverloaded(unsigned IntrID) const { + if (!IntrID) + return false; + + unsigned id = IntrID - Intrinsic::num_intrinsics + 1; +#define GET_INTRINSIC_OVERLOAD_TABLE +#include "HSAILGenIntrinsics.inc" +#undef GET_INTRINSIC_OVERLOAD_TABLE +} + +Function *HSAILIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, + Type **Tys, + unsigned int numTys) const { + llvm_unreachable("Not implemented"); + + return nullptr; +} Index: lib/Target/HSAIL/HSAILIntrinsics.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILIntrinsics.td @@ -0,0 +1,1321 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the hsail-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "HSAIL", isTarget = 1 in { + +// HSAIL intrinsics +def int_HSAIL_workitemid_flat : GCCBuiltin<"__hsail_workitemid_flat">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_workitemid_flatabs : GCCBuiltin<"__hsail_workitemid_flatabs">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_lane_id : GCCBuiltin<"__hsail_get_lane_id">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_dynwave_id : GCCBuiltin<"__hsail_get_dynwave_id">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_maxdynwave_id : GCCBuiltin<"__hsail_get_maxdynwave_id">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_clock : GCCBuiltin<"__hsail_get_clock">, + Intrinsic<[llvm_i64_ty], [], [] +>; + +def int_HSAIL_get_cu : GCCBuiltin<"__hsail_get_cu">, + Intrinsic<[llvm_i32_ty], [], [] +>; + +// HSAIL Cross-Lane Intrinsics +// For OCL 2.0 1/32/64-bit built-ins +def int_HSAIL_activelaneid_u32 : GCCBuiltin<"__hsail_activelaneid_u32">, + Intrinsic<[llvm_i32_ty], [], [] +>; + +def int_HSAIL_activelaneid_width_u32 : + GCCBuiltin<"__hsail_activelaneid_wavewidth_u32">, + Intrinsic<[llvm_i32_ty], [], [] +>; + +def int_HSAIL_activelanecount_u32_b1 : + GCCBuiltin<"__hsail_activelanecount_u32_b1">, + Intrinsic<[llvm_i32_ty], [llvm_i1_ty], [] +>; + +def int_HSAIL_activelanecount_width_u32_b1 : + GCCBuiltin<"__hsail_activelanecount_wavewidth_u32_b1">, + Intrinsic<[llvm_i32_ty], [llvm_i1_ty], [] +>; + +def int_HSAIL_activelanepermute_b32 : + GCCBuiltin<"__hsail_activelanepermute_b32">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [] +>; + +def int_HSAIL_activelanepermute_width_b32 : + GCCBuiltin<"__hsail_activelanepermute_wavewidth_b32">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i1_ty], [] +>; + +def int_HSAIL_activelanemask_v4_b64_b1 : + GCCBuiltin<"__hsail_activelanemask_v4_b64_b1">, + Intrinsic<[llvm_i64_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_i1_ty], [] +>; + +def int_HSAIL_activelanemask_v4_width_b64_b1 : + GCCBuiltin<"__hsail_activelanemask_v4_wavewidth_b64_b1">, + Intrinsic<[llvm_i64_ty, llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [llvm_i1_ty], [] +>; + + +// For OCL 2.0 64-bit built-ins + +def int_HSAIL_activelanepermute_b64 : + GCCBuiltin< "__hsail_activelanepermute_b64">, + Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i1_ty], [] +>; + +def int_HSAIL_activelanepermute_width_b64 : + GCCBuiltin<"__hsail_activelanepermute_wavewidth_b64">, + Intrinsic<[llvm_i64_ty], + [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty, llvm_i1_ty], [] +>; + +// Intrinsics for OpenCL workitem built-ins (OCL 1.2 6.12.2) +def int_HSAIL_get_work_dim : GCCBuiltin<"__hsail_get_work_dim">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_get_global_id : GCCBuiltin<"__hsail_get_global_id">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_get_group_id : GCCBuiltin<"__hsail_get_group_id">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_get_local_id : GCCBuiltin<"__hsail_get_local_id">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_workgroup_size : GCCBuiltin<"__hsail_workgroup_size">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_currentworkgroup_size : + GCCBuiltin<"__hsail_currentworkgroup_size">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_get_global_size : GCCBuiltin<"__hsail_get_global_size">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_get_num_groups : GCCBuiltin<"__hsail_get_num_groups">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +// Intrinsics for OpenCL math built-ins (OCL 1.2 6.12.2) +def int_HSAIL_copysign_f32 : GCCBuiltin<"__hsail_copysign_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_copysign_f64 : GCCBuiltin<"__hsail_copysign_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_ncos_f32 : GCCBuiltin<"__hsail_ncos_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nexp2_f32 : GCCBuiltin<"__hsail_nexp2_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_abs_f32 : GCCBuiltin<"__hsail_abs_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_abs_f64 : GCCBuiltin<"__hsail_abs_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_fma_f32 : GCCBuiltin<"__hsail_fma_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; +def int_HSAIL_fma_f64 : GCCBuiltin<"__hsail_fma_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem]>; + +def int_HSAIL_nfma_f32 : GCCBuiltin<"__hsail_nfma_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nfma_f64 : GCCBuiltin<"__hsail_nfma_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_nlog2_f32 : GCCBuiltin<"__hsail_nlog2_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nsin_f32 : GCCBuiltin<"__hsail_nsin_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nsqrt_f32 : GCCBuiltin<"__hsail_nsqrt_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nsqrt_f64 : GCCBuiltin<"__hsail_nsqrt_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_sqrt_ftz_f32 : GCCBuiltin<"__hsail_sqrt_ftz_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_sqrt_f64 : GCCBuiltin<"__hsail_sqrt_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_nrsqrt_f32 : GCCBuiltin<"__hsail_nrsqrt_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nrsqrt_f64 : GCCBuiltin<"__hsail_nrsqrt_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_nrcp_f32 : GCCBuiltin<"__hsail_nrcp_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_nrcp_f64 : GCCBuiltin<"__hsail_nrcp_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_ceil_f32 : GCCBuiltin<"__hsail_ceil_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_ceil_f64 : GCCBuiltin<"__hsail_ceil_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_floor_f32 : GCCBuiltin<"__hsail_floor_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_floor_f64 : GCCBuiltin<"__hsail_floor_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_rnd_f32 : GCCBuiltin<"__hsail_round_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_rnd_f64 : GCCBuiltin<"__hsail_round_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_trunc_f32 : GCCBuiltin<"__hsail_trunc_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_trunc_f64 : GCCBuiltin<"__hsail_trunc_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +// Intrinsics for OpenCL integer built-ins (OCL 1.2 6.12.3) +def int_HSAIL_max_u32 : GCCBuiltin<"__hsail_max_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_max_s32 : GCCBuiltin<"__hsail_max_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_max_u64 : GCCBuiltin<"__hsail_max_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_max_s64 : GCCBuiltin<"__hsail_max_s64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_max_f32 : GCCBuiltin<"__hsail_max_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_max_f64 : GCCBuiltin<"__hsail_max_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_max_f32 : GCCBuiltin<"__gcn_max_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_max_f64 : GCCBuiltin<"__gcn_max_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +// FIXME: min / max intrinsics should be removed. +def int_HSAIL_min_u32 : GCCBuiltin<"__hsail_min_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_min_s32 : GCCBuiltin<"__hsail_min_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_min_u64 : GCCBuiltin<"__hsail_min_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_min_s64 : GCCBuiltin<"__hsail_min_s64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_min_f32 : GCCBuiltin<"__hsail_min_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_min_f64 : GCCBuiltin<"__hsail_min_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_min_f32 : GCCBuiltin<"__gcn_min_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_min_f64 : GCCBuiltin<"__gcn_min_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_div_f32 : GCCBuiltin<"__hsail_div_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +// Intrinsics for OpenCL relational built-ins (OCL 1.2 6.12.6) +def int_HSAIL_class_f32 : GCCBuiltin<"__hsail_class_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_class_f64 : GCCBuiltin<"__hsail_class_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty, llvm_i32_ty], [IntrNoMem] +>; + +// Intrinsics for OpenCL synchronization built-ins (OCL 1.2 6.12.8) +def int_HSAIL_barrier : GCCBuiltin<"__hsail_barrier">, + Intrinsic<[], [], [] +>; + +def int_HSAIL_wavebarrier : GCCBuiltin<"__hsail_wavebarrier">, + Intrinsic<[], [], [] +>; + +def int_HSAIL_memfence : GCCBuiltin<"__hsail_memfence">, + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [] +>; + +// Instrinsics for explicit conversions +// float to int +def int_HSAIL_cvt_s32_neari_f32 : GCCBuiltin<"__cvt_s32_rte_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_downi_f32 : GCCBuiltin<"__cvt_s32_rtn_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_upi_f32 : GCCBuiltin<"__cvt_s32_rtp_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_zeroi_f32 : GCCBuiltin<"__cvt_s32_rtz_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + + +// float to unsigned int +def int_HSAIL_cvt_u32_neari_f32 : GCCBuiltin<"__cvt_u32_rte_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_downi_f32 : GCCBuiltin<"__cvt_u32_rtn_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_upi_f32 : GCCBuiltin<"__cvt_u32_rtp_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_zeroi_f32 : GCCBuiltin<"__cvt_u32_rtz_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem] +>; + + +// float to long +def int_HSAIL_cvt_s64_neari_f32 : GCCBuiltin<"__cvt_s64_rte_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_downi_f32 : GCCBuiltin<"__cvt_s64_rtn_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_upi_f32 : GCCBuiltin<"__cvt_s64_rtp_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_zeroi_f32 : GCCBuiltin<"__cvt_s64_rtz_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + + +// float to unsigned long +def int_HSAIL_cvt_u64_neari_f32 : GCCBuiltin<"__cvt_u64_rte_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_downi_f32 : GCCBuiltin<"__cvt_u64_rtn_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_upi_f32 : GCCBuiltin<"__cvt_u64_rtp_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_zeroi_f32 : GCCBuiltin<"__cvt_u64_rtz_f32">, + Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem] +>; + + +// double to int +def int_HSAIL_cvt_s32_neari_f64 : GCCBuiltin<"__cvt_s32_rte_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_downi_f64 : GCCBuiltin<"__cvt_s32_rtn_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_upi_f64 : GCCBuiltin<"__cvt_s32_rtp_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s32_zeroi_f64 : GCCBuiltin<"__cvt_s32_rtz_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// double to unsigned int +def int_HSAIL_cvt_u32_neari_f64 : GCCBuiltin<"__cvt_u32_rte_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_downi_f64 : GCCBuiltin<"__cvt_u32_rtn_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_upi_f64 : GCCBuiltin<"__cvt_u32_rtp_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u32_zeroi_f64 : GCCBuiltin<"__cvt_u32_rtz_f64">, + Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// double to long +def int_HSAIL_cvt_s64_neari_f64 : GCCBuiltin<"__cvt_s64_rte_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_downi_f64 : GCCBuiltin<"__cvt_s64_rtn_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_upi_f64 : GCCBuiltin<"__cvt_s64_rtp_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_s64_zeroi_f64 : GCCBuiltin<"__cvt_s64_rtz_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// double to unsigned long +def int_HSAIL_cvt_u64_neari_f64 : GCCBuiltin<"__cvt_u64_rte_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_downi_f64 : GCCBuiltin<"__cvt_u64_rtn_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_upi_f64 : GCCBuiltin<"__cvt_u64_rtp_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_u64_zeroi_f64 : GCCBuiltin<"__cvt_u64_rtz_f64">, + Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// int to float +def int_HSAIL_cvt_f32_down_i32 : GCCBuiltin<"__cvt_f32_rtn_i32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_i32 : GCCBuiltin<"__cvt_f32_rtp_i32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_i32 : GCCBuiltin<"__cvt_f32_rtz_i32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + + +// unsigned int to float +def int_HSAIL_cvt_f32_down_u32 : GCCBuiltin<"__cvt_f32_rtn_u32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_u32 : GCCBuiltin<"__cvt_f32_rtp_u32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_u32 : GCCBuiltin<"__cvt_f32_rtz_u32">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + + +// long to float +def int_HSAIL_cvt_f32_down_i64 : GCCBuiltin<"__cvt_f32_rtn_i64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_i64 : GCCBuiltin<"__cvt_f32_rtp_i64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_i64 : GCCBuiltin<"__cvt_f32_rtz_i64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + + +// unsigned long to float +def int_HSAIL_cvt_f32_down_u64 : GCCBuiltin<"__cvt_f32_rtn_u64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_u64 : GCCBuiltin<"__cvt_f32_rtp_u64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_u64 : GCCBuiltin<"__cvt_f32_rtz_u64">, + Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem] +>; + + +// long to double +def int_HSAIL_cvt_f64_down_i64 : GCCBuiltin<"__cvt_f64_rtn_i64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f64_up_i64 : GCCBuiltin<"__cvt_f64_rtp_i64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f64_zero_i64 : GCCBuiltin<"__cvt_f64_rtz_i64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + + +// unsigned long to double +def int_HSAIL_cvt_f64_down_u64 : GCCBuiltin<"__cvt_f64_rtn_u64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f64_up_u64 : GCCBuiltin<"__cvt_f64_rtp_u64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f64_zero_u64 : GCCBuiltin<"__cvt_f64_rtz_u64">, + Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem] +>; + + +// double to float +def int_HSAIL_cvt_f32_down_f64 : GCCBuiltin<"__cvt_f32_rtn_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_up_f64 : GCCBuiltin<"__cvt_f32_rtp_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_f32_zero_f64 : GCCBuiltin<"__cvt_f32_rtz_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + + +// half to float +def int_HSAIL_cvt_f32_f16 : GCCBuiltin<"__cvt_f32_f16">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem] +>; + + +// float to half +def int_HSAIL_cvt_zero_f16_f32 : GCCBuiltin<"__cvt_f16_rtz_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_near_f16_f32 : GCCBuiltin<"__cvt_f16_rte_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_down_f16_f32 : GCCBuiltin<"__cvt_f16_rtn_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_up_f16_f32 : GCCBuiltin<"__cvt_f16_rtp_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +// double to half +def int_HSAIL_cvt_zero_f16_f64 : GCCBuiltin<"__cvt_f16_rtz_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_near_f16_f64 : GCCBuiltin<"__cvt_f16_rte_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_down_f16_f64 : GCCBuiltin<"__cvt_f16_rtn_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_cvt_up_f16_f64 : GCCBuiltin<"__cvt_f16_rtp_f64">, + Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem] +>; + +// Misc intrinsics used by OpenCL built-ins +def int_HSAIL_bitselect_u32 : GCCBuiltin<"__hsail_bitselect_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_bitselect_u64 : GCCBuiltin<"__hsail_bitselect_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +// Media-ops intrinsics +def int_HSAIL_bitalign_b32 : GCCBuiltin<"__hsail_bitalign_b32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_bytealign_b32 : GCCBuiltin<"__hsail_bytealign_b32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_packcvt_u8x4_f32 : GCCBuiltin<"__hsail_packcvt_u8x4_f32">, + Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_lerp_u8x4 : GCCBuiltin<"__hsail_lerp_u8x4">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_sad_u32_u8x4 : GCCBuiltin<"__hsail_sad_u32_u8x4">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_sadhi_u16x2_u8x4 : GCCBuiltin<"__hsail_sadhi_u16x2_u8x4">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_unpackcvt_f32_u8x4 : GCCBuiltin<"__hsail_unpackcvt_f32_u8x4">, + Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +// Media Ops2 + +def int_HSAIL_msad: GCCBuiltin<"__hsail_msad">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_sadw: GCCBuiltin<"__hsail_sadw">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_sadd: GCCBuiltin<"__hsail_sadd">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_umin3: GCCBuiltin<"__hsail_umin3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_fmin3: GCCBuiltin<"__hsail_f32_min3">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_imin3: GCCBuiltin<"__hsail_imin3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_umax3: GCCBuiltin<"__hsail_umax3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_fmax3: GCCBuiltin<"__hsail_f32_max3">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_imax3: GCCBuiltin<"__hsail_imax3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_umed3: GCCBuiltin<"__hsail_umedian3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_fmed3: GCCBuiltin<"__hsail_f32_median3">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_imed3: GCCBuiltin<"__hsail_imedian3">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_qsad: GCCBuiltin<"__hsail_qsad">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_mqsad: GCCBuiltin<"__hsail_mqsad">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_bfe: GCCBuiltin<"__hsail_bfe">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_ibfe: GCCBuiltin<"__hsail_ibfe">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_bfm: GCCBuiltin<"__hsail_bfm">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_ftz_f32 : GCCBuiltin<"__hsail_ftz_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_mul_ftz_f32 : GCCBuiltin<"__hsail_mul_ftz_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_firstbit_u32 : GCCBuiltin<"__hsail_firstbit_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_lastbit_u32 : GCCBuiltin<"__hsail_lastbit_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_fract_f32 : GCCBuiltin<"__hsail_fraction_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem] +>; + +def int_HSAIL_fract_f64 : GCCBuiltin<"__hsail_fraction_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem] +>; + +def int_HSAIL_mad_u32 : GCCBuiltin<"__hsail_mad_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mad_u64 : GCCBuiltin<"__hsail_mad_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_mulhi_s32 : GCCBuiltin<"__hsail_mulhi_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mulhi_u32 : GCCBuiltin<"__hsail_mulhi_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mulhi_s64 : GCCBuiltin<"__hsail_mulhi_s64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_mulhi_u64 : GCCBuiltin<"__hsail_mulhi_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem] +>; + +def int_HSAIL_mad24_s32 : GCCBuiltin<"__hsail_mad24_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mad24_u32 : GCCBuiltin<"__hsail_mad24_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mul24_s32 : GCCBuiltin<"__hsail_mul24_s32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_mul24_u32 : GCCBuiltin<"__hsail_mul24_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_popcount_u32_b32 : GCCBuiltin<"__hsail_popcount_u32_b32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_fldexp_f32 : GCCBuiltin<"__gcn_fldexp_f32">, + Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_gcn_fldexp_f64 : GCCBuiltin<"__gcn_fldexp_f64">, + Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_i32_ty], [IntrNoMem] +>; + +// atomic counter32 +// TODO: IntrReadWriteArgMem? +def int_HSAIL_gcn_atomic_append_u32 : GCCBuiltin<"__gcn_atomic_append_u32">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [] +>; + +def int_HSAIL_gcn_atomic_consume_u32 : GCCBuiltin<"__gcn_atomic_consume_u32">, + Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [] +>; + +// Image intrinsics + +// Image read instrinsics +let TargetPrefix = "HSAIL", isTarget = 1 in { + // Read image intrinsic classes + class ReadImage1DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType], + [IntrReadArgMem, NoCapture<4>, NoCapture<5>] + >; + + class ReadImage2DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType, CoordType], + [IntrReadArgMem, NoCapture<4>, NoCapture<5>] + >; + + class ReadImage3DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType, CoordType, CoordType], + [IntrReadArgMem, NoCapture<4>, NoCapture<5>] + >; + + // OpenCL 2.0 image 2D Depth + class ReadImage2DDepthIntr : + GCCBuiltin, + Intrinsic<[DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType, CoordType], + [IntrReadArgMem, NoCapture<1>, NoCapture<2>] + >; + + // OpenCL 2.0 image 2D Array Depth + class ReadImage2DArrayDepthIntr : + GCCBuiltin, + Intrinsic<[DestType], + [llvm_ptr_ty, llvm_ptr_ty, CoordType, CoordType, CoordType], + [IntrReadArgMem, NoCapture<1>, NoCapture<2>] + >; + + // Load Image intrinsic classes + class LoadImage1DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, CoordType], + [IntrReadArgMem, NoCapture<4>] + >; + + class LoadImage2DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, CoordType, CoordType], + [IntrReadArgMem, NoCapture<4>] + >; + + class LoadImage3DIntr : + GCCBuiltin, + Intrinsic<[DestType, DestType, DestType, DestType], + [llvm_ptr_ty, CoordType, CoordType, CoordType], + [IntrReadArgMem, NoCapture<4>] + >; + + class LoadImage2DDepthIntr : + GCCBuiltin, + Intrinsic<[DestType], + [llvm_ptr_ty, CoordType, CoordType], + [IntrReadArgMem, NoCapture<1>] + >; + + class LoadImage2DArrayDepthIntr : + GCCBuiltin, + Intrinsic<[DestType], + [llvm_ptr_ty, CoordType, CoordType, CoordType], + [IntrReadArgMem, NoCapture<1>] + >; + + // Store image intrinsic classes + class StoreImage1dInt : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty, llvm_i32_ty], [] + >; + + class StoreImage1dFloat : + Intrinsic<[], [llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_ptr_ty, llvm_i32_ty], [] + >; + + class StoreImage2dInt : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [] + >; + + class StoreImage2dFloat : + Intrinsic<[], [llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_ptr_ty, llvm_i32_ty, + llvm_i32_ty], [] + >; + + class StoreImage3dInt : + Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, + llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] + >; + + class StoreImage3dFloat : + Intrinsic<[], [llvm_float_ty, llvm_float_ty, llvm_float_ty, + llvm_float_ty, llvm_ptr_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] + >; + + class StoreImage2dDepthFloat : + Intrinsic<[], [llvm_float_ty, llvm_ptr_ty, + llvm_i32_ty, llvm_i32_ty], [] + >; + + class StoreImage2dArrayDepthFloat : + Intrinsic<[], [llvm_float_ty, llvm_ptr_ty, llvm_i32_ty, + llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [] + >; +} + +// read image 1d +def int_HSAIL_rd_imgf_1d_s32 : + ReadImage1DIntr<"__hsail_rdimagef_1d_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_1d_f32 : + ReadImage1DIntr<"__hsail_rdimagef_1d_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_1d_s32 : + ReadImage1DIntr<"__hsail_rdimagei_1d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_1d_f32 : + ReadImage1DIntr<"__hsail_rdimagei_1d_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_1d_s32 : + ReadImage1DIntr<"__hsail_rdimageui_1d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_1d_f32 : + ReadImage1DIntr<"__hsail_rdimageui_1d_f32", llvm_i32_ty, llvm_float_ty>; + +// read image 1d array +def int_HSAIL_rd_imgf_1da_s32 : + ReadImage2DIntr<"__hsail_rdimagef_1da_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_1da_f32 : + ReadImage2DIntr<"__hsail_rdimagef_1da_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_1da_s32 : + ReadImage2DIntr<"__hsail_rdimagei_1da_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_1da_f32 : + ReadImage2DIntr<"__hsail_rdimagei_1da_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_1da_s32 : + ReadImage2DIntr<"__hsail_rdimageui_1da_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_1da_f32 : + ReadImage2DIntr<"__hsail_rdimageui_1da_f32", llvm_i32_ty, llvm_float_ty>; + +// read image 2d +def int_HSAIL_rd_imgf_2d_s32 : + ReadImage2DIntr<"__hsail_rdimagef_2d_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_2d_f32 : + ReadImage2DIntr<"__hsail_rdimagef_2d_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_2d_s32 : + ReadImage2DIntr<"__hsail_rdimagei_2d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_2d_f32 : + ReadImage2DIntr<"__hsail_rdimagei_2d_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_2d_s32 : + ReadImage2DIntr<"__hsail_rdimageui_2d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_2d_f32 : + ReadImage2DIntr<"__hsail_rdimageui_2d_f32", llvm_i32_ty, llvm_float_ty>; + +// read image 2d array +def int_HSAIL_rd_imgf_2da_s32 : + ReadImage3DIntr<"__hsail_rdimagef_2da_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_2da_f32 : + ReadImage3DIntr<"__hsail_rdimagef_2da_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_2da_s32 : + ReadImage3DIntr<"__hsail_rdimagei_2da_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_2da_f32 : + ReadImage3DIntr<"__hsail_rdimagei_2da_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_2da_s32 : + ReadImage3DIntr<"__hsail_rdimageui_2da_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_2da_f32 : + ReadImage3DIntr<"__hsail_rdimageui_2da_f32", llvm_i32_ty, llvm_float_ty>; + +// read image 3d +def int_HSAIL_rd_imgf_3d_s32 : + ReadImage3DIntr<"__hsail_rdimagef_3d_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_3d_f32 : + ReadImage3DIntr<"__hsail_rdimagef_3d_f32", llvm_float_ty, llvm_float_ty>; +def int_HSAIL_rd_imgi_3d_s32 : + ReadImage3DIntr<"__hsail_rdimagei_3d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgi_3d_f32 : + ReadImage3DIntr<"__hsail_rdimagei_3d_f32", llvm_i32_ty, llvm_float_ty>; +def int_HSAIL_rd_imgui_3d_s32 : + ReadImage3DIntr<"__hsail_rdimageui_3d_s32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgui_3d_f32 : + ReadImage3DIntr<"__hsail_rdimageui_3d_f32", llvm_i32_ty, llvm_float_ty>; + +// OpenCL 2.0 read image 2ddepth +def int_HSAIL_rd_imgf_2ddepth_s32 : + ReadImage2DDepthIntr<"__hsail_rdimagef_2ddepth_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_2ddepth_f32 : + ReadImage2DDepthIntr<"__hsail_rdimagef_2ddepth_f32", llvm_float_ty, llvm_float_ty>; + +// OpenCL 2.0 read image 2dadepth +def int_HSAIL_rd_imgf_2dadepth_s32 : + ReadImage2DArrayDepthIntr<"__hsail_rdimagef_2dadepth_s32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_rd_imgf_2dadepth_f32 : + ReadImage2DArrayDepthIntr<"__hsail_rdimagef_2dadepth_f32", llvm_float_ty, llvm_float_ty>; + +// Load image intrinsics + +// load image 1d +def int_HSAIL_ld_imgf_1d_u32 : + LoadImage1DIntr<"__hsail_ldimagef_1d_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_1d_u32 : + LoadImage1DIntr<"__hsail_ldimagei_1d_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_1d_u32 : + LoadImage1DIntr<"__hsail_ldimageui_1d_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 1d buffer +def int_HSAIL_ld_imgf_1db_u32 : + LoadImage1DIntr<"__hsail_ldimagef_1db_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_1db_u32 : + LoadImage1DIntr<"__hsail_ldimagei_1db_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_1db_u32 : + LoadImage1DIntr<"__hsail_ldimageui_1db_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 1d array +def int_HSAIL_ld_imgf_1da_u32 : + LoadImage2DIntr<"__hsail_ldimagef_1da_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_1da_u32 : + LoadImage2DIntr<"__hsail_ldimagei_1da_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_1da_u32 : + LoadImage2DIntr<"__hsail_ldimageui_1da_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 2d +def int_HSAIL_ld_imgf_2d_u32 : + LoadImage2DIntr<"__hsail_ldimagef_2d_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_2d_u32 : + LoadImage2DIntr<"__hsail_ldimagei_2d_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_2d_u32 : + LoadImage2DIntr<"__hsail_ldimageui_2d_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 1d array +def int_HSAIL_ld_imgf_2da_u32 : + LoadImage3DIntr<"__hsail_ldimagef_2da_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_2da_u32 : + LoadImage3DIntr<"__hsail_ldimagei_2da_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_2da_u32 : + LoadImage3DIntr<"__hsail_ldimageui_2da_u32", llvm_i32_ty, llvm_i32_ty>; + +// load image 3d +def int_HSAIL_ld_imgf_3d_u32 : + LoadImage3DIntr<"__hsail_ldimagef_3d_u32", llvm_float_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgi_3d_u32 : + LoadImage3DIntr<"__hsail_ldimagei_3d_u32", llvm_i32_ty, llvm_i32_ty>; +def int_HSAIL_ld_imgui_3d_u32 : + LoadImage3DIntr<"__hsail_ldimageui_3d_u32", llvm_i32_ty, llvm_i32_ty>; + +// OpenCL 2.0 load image 2d depth +def int_HSAIL_ld_imgf_2ddepth_u32 : + LoadImage2DDepthIntr<"__hsail_ldimagef_2ddepth_u32", llvm_float_ty, llvm_i32_ty>; + +// OpenCL 2.0 load image 2dadepth +def int_HSAIL_ld_imgf_2dadepth_u32 : + LoadImage2DArrayDepthIntr<"__hsail_ldimagef_2dadepth_u32", llvm_float_ty, llvm_i32_ty>; + + +// Image store intrinsics +// store image 1d +def int_HSAIL_stimagef_1d_i32 : GCCBuiltin<"__hsail_stimagef_1d_i32">, + StoreImage1dFloat; +def int_HSAIL_stimagei_1d_i32 : GCCBuiltin<"__hsail_stimagei_1d_i32">, + StoreImage1dInt; +def int_HSAIL_stimageui_1d_i32 : GCCBuiltin<"__hsail_stimageui_1d_i32">, + StoreImage1dInt; + +// store image 1d array +def int_HSAIL_stimagef_1da_i32 : GCCBuiltin<"__hsail_stimagef_1da_i32">, + StoreImage2dFloat; +def int_HSAIL_stimagei_1da_i32 : GCCBuiltin<"__hsail_stimagei_1da_i32">, + StoreImage2dInt; +def int_HSAIL_stimageui_1da_i32 : GCCBuiltin<"__hsail_stimageui_1da_i32">, + StoreImage2dInt; + +// store image 1d buffer +def int_HSAIL_stimagef_1db_i32 : GCCBuiltin<"__hsail_stimagef_1db_i32">, + StoreImage1dFloat; +def int_HSAIL_stimagei_1db_i32 : GCCBuiltin<"__hsail_stimagei_1db_i32">, + StoreImage1dInt; +def int_HSAIL_stimageui_1db_i32 : GCCBuiltin<"__hsail_stimageui_1db_i32">, + StoreImage1dInt; + +// store image 2d +def int_HSAIL_stimagef_2d_i32 : GCCBuiltin<"__hsail_stimagef_2d_i32">, + StoreImage2dFloat; +def int_HSAIL_stimagei_2d_i32 : GCCBuiltin<"__hsail_stimagei_2d_i32">, + StoreImage2dInt; +def int_HSAIL_stimageui_2d_i32 : GCCBuiltin<"__hsail_stimageui_2d_i32">, + StoreImage2dInt; + +// store image 2d array +def int_HSAIL_stimagef_2da_i32 : GCCBuiltin<"__hsail_stimagef_2da_i32">, + StoreImage3dFloat; +def int_HSAIL_stimagei_2da_i32 : GCCBuiltin<"__hsail_stimagei_2da_i32">, + StoreImage3dInt; +def int_HSAIL_stimageui_2da_i32 : GCCBuiltin<"__hsail_stimageui_2da_i32">, + StoreImage3dInt; + +// store image 3d +def int_HSAIL_stimagef_3d_i32 : GCCBuiltin<"__hsail_stimagef_3d_i32">, + StoreImage3dFloat; +def int_HSAIL_stimagei_3d_i32 : GCCBuiltin<"__hsail_stimagei_3d_i32">, + StoreImage3dInt; +def int_HSAIL_stimageui_3d_i32 : GCCBuiltin<"__hsail_stimageui_3d_i32">, + StoreImage3dInt; + +// store image 2d depth +def int_HSAIL_stimagef_2ddepth_i32 : GCCBuiltin<"__hsail_stimagef_2ddepth_i32">, + StoreImage2dDepthFloat; + +// store image 2d array depth +def int_HSAIL_stimagef_2dadepth_i32 : GCCBuiltin<"__hsail_stimagef_2dadepth_i32">, + StoreImage2dArrayDepthFloat; + +// Image query +def int_HSAIL_query_width_1d : GCCBuiltin<"__hsail_query_width_1d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_1db : GCCBuiltin<"__hsail_query_width_1db">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_1da : GCCBuiltin<"__hsail_query_width_1da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_2d : GCCBuiltin<"__hsail_query_width_2d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_2da : GCCBuiltin<"__hsail_query_width_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_3d : GCCBuiltin<"__hsail_query_width_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_2ddepth : GCCBuiltin<"__hsail_query_width_2ddepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_width_2dadepth : GCCBuiltin<"__hsail_query_width_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_2d : GCCBuiltin<"__hsail_query_height_2d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_2da : GCCBuiltin<"__hsail_query_height_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_3d : GCCBuiltin<"__hsail_query_height_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_2ddepth : GCCBuiltin<"__hsail_query_height_2ddepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_height_2dadepth : GCCBuiltin<"__hsail_query_height_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_depth_3d : GCCBuiltin<"__hsail_depth_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_1d : GCCBuiltin<"__hsail_query_format_1d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_1db : GCCBuiltin<"__hsail_query_format_1db">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_1da : GCCBuiltin<"__hsail_query_format_1da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_2d : GCCBuiltin<"__hsail_query_format_2d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_2da : GCCBuiltin<"__hsail_query_format_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_format_3d : GCCBuiltin<"__hsail_query_format_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_1d : GCCBuiltin<"__hsail_query_order_1d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_1db : GCCBuiltin<"__hsail_query_order_1db">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_1da : GCCBuiltin<"__hsail_query_order_1da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_2d : GCCBuiltin<"__hsail_query_order_2d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_2da : GCCBuiltin<"__hsail_query_order_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_order_3d : GCCBuiltin<"__hsail_query_order_3d">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_array_1da : GCCBuiltin<"__hsail_query_array_1da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_array_2da : GCCBuiltin<"__hsail_query_array_2da">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_array_2dadepth : GCCBuiltin<"__hsail_query_array_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_channelorder_2ddepth : GCCBuiltin<"__hsail_query_channelorder_2ddepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_channelorder_2dadepth : GCCBuiltin<"__hsail_query_channelorder_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_channeltype_2ddepth : GCCBuiltin<"__hsail_query_channeltype_2ddepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_query_channeltype_2dadepth : GCCBuiltin<"__hsail_query_channeltype_2dadepth">, + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [] +>; + +def int_HSAIL_segmentp_global : GCCBuiltin<"__hsail_segmentp_global">, + Intrinsic<[llvm_i1_ty], [llvm_anyptr_ty], [IntrNoMem] +>; + +def int_HSAIL_segmentp_local : GCCBuiltin<"__hsail_segmentp_local">, + Intrinsic<[llvm_i1_ty], [llvm_anyptr_ty], [IntrNoMem] +>; + +def int_HSAIL_segmentp_private : GCCBuiltin<"__hsail_segmentp_private">, + Intrinsic<[llvm_i1_ty], [llvm_anyptr_ty], [IntrNoMem] +>; + +def int_HSAIL_nullptr_flat : GCCBuiltin<"__hsail_nullptr_flat">, + Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem] +>; + +// FIXME: Should this be removed? It produces the same output as for flat. +def int_HSAIL_nullptr_global : GCCBuiltin<"__hsail_nullptr_global">, + Intrinsic<[llvm_anyptr_ty],[], [IntrNoMem] +>; + +def int_HSAIL_nullptr_group : GCCBuiltin<"__hsail_nullptr_group">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +def int_HSAIL_nullptr_private : GCCBuiltin<"__hsail_nullptr_private">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem] +>; + +// FIXME: Should this be removed? It produces the same output as for flat. +def int_HSAIL_nullptr_readonly : GCCBuiltin<"__hsail_nullptr_readonly">, + Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem] +>; + +def int_HSAIL_nullptr_kernarg : GCCBuiltin<"__hsail_nullptr_kernarg">, + Intrinsic<[llvm_anyptr_ty], [], [IntrNoMem] +>; + +// ld_kernarg instructions have no side effects and can be CSE'd or +// even deleted if dead. +def int_HSAIL_ld_kernarg_u32 : GCCBuiltin<"__hsail_ld_kernarg_u32">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem] +>; + +def int_HSAIL_ld_kernarg_u64 : GCCBuiltin<"__hsail_ld_kernarg_u64">, + Intrinsic<[llvm_i64_ty], [llvm_i32_ty], [IntrNoMem] +>; + +} Index: lib/Target/HSAIL/HSAILKernel.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILKernel.h @@ -0,0 +1,124 @@ +//===-- HSAILKernel.h - HSAIL Kernel Class ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Definition of a HSAILKernel object and the various subclasses that +/// are used. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILKERNEL_H +#define LLVM_LIB_TARGET_HSAIL_HSAILKERNEL_H + +#include "HSAIL.h" +#include "llvm/IR/Value.h" +#include "llvm/IR/Constant.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { +class HSAILSubtarget; +class HSAILTargetMachine; +/// structure that holds information for a single local/region address array +typedef struct _HSAILArrayMemRec { + uint32_t vecSize; // size of each vector + uint32_t offset; // offset into the memory section + uint32_t align; // alignment + bool isHW; // flag to specify if HW is used or SW is used + bool isRegion; // flag to specify if GDS is used or not +} HSAILArrayMem; + +/// structure that holds information about a constant address +/// space pointer that is a kernel argument +typedef struct _HSAILConstPtrRec { + const Value *base; + uint32_t size; + uint32_t offset; + uint32_t align; // alignment + uint32_t cbNum; // value of 0 means that it does not use hw CB + bool isArray; // flag to specify that this is an array + bool isArgument; // flag to specify that this is for a kernel argument + bool usesHardware; // flag to specify if hardware CB is used or not + std::string name; +} HSAILConstPtr; + +/// Structure that holds information for all local/region address +/// arrays in the kernel +typedef struct _HSAILLocalPrivateArgRec { + // SmallVector local; + std::string name; // Kernel Name +} HSAILLocalPrivateArg; + +/// Structure that holds information for each kernel argument +typedef struct _HSAILkernelArgRec { + uint32_t reqGroupSize[3]; // x,y,z sizes for group. + uint32_t reqRegionSize[3]; // x,y,z sizes for region. + SmallVector argInfo; // Information about arguments. + bool mHasRWG; // true if reqd_work_group_size is specified. + bool mHasRWR; // true if reqd_work_region_size is specified. + + _HSAILkernelArgRec() { + mHasRWG = false; + mHasRWR = false; + } +} HSAILKernelAttr; + +/// Holds information for each kernel. +struct HSAILKernel { + uint32_t curSize; // local memory, hardware + software emulated + uint32_t curRSize; // region memory, hardware + software emulated + uint32_t curHWSize; // hardware local memory + uint32_t curHWRSize; // hardware region memory + + bool mKernel; // true if this is a kernel + std::string mName; + HSAILKernelAttr *sgv; // kernel attributes + + // vector containing constant pointer information + SmallVector constPtr; + + // set that specifies the read-only images for the kernel + SmallSet readOnly; + + // set that specifies the write-only images for the kernel + SmallSet writeOnly; + + // set that specifies the read-write images for the kernel + SmallSet readWrite; + + // set that specifies the access type qulifiers for the kernel arguments + std::vector accessTypeQualifer; + + // Vector of constant pool offsets + SmallVector, 8> CPOffsets; + + // Vector of kernel argument type names + std::vector ArgTypeNames; + + // Fields required for device enqueue. + bool EnqueuesKernel; // true if enqueues a kernel. + uint32_t KernelIndex; // positive value which deonotes the kernel index + + HSAILKernel() { + curSize = 0; + curRSize = 0; + curHWSize = 0; + curHWRSize = 0; + + mKernel = false; + sgv = nullptr; + + EnqueuesKernel = false; + KernelIndex = -1; + } +}; +} // end llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILLoadStore.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILLoadStore.td @@ -0,0 +1,112 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// stof, ftos -- not matched, inserted in MachineInstr lowering +// 32 bit + + + +defm STOF : InstSegCvt_1Op_PtrTypes<"stof", BrigOpcode.STOF>; +defm FTOS : InstSegCvt_1Op_PtrTypes<"ftos", BrigOpcode.FTOS>; +defm SEGMENTP : InstSegCvt_1Op_Segmentp_Types<"segmentp", BrigOpcode.SEGMENTP>; + +class SegmentPPat : Pat< + (HSAILsegmentp (i32 timm:$segment), + (i1 timm:$nonull), + (srcTy.VT (GPROrImm srcTy.VT:$src0))), + (!cast("SEGMENTP_B1"#srcTy.InstName) $segment, $nonull, $src0, BrigType.B1, srcTy.BT) +>; + + +let Predicates = [LargeModel] in { + def : SegmentPPat; +} + +let Predicates = [SmallModel] in { + def : SegmentPPat; +} + +let isNotDuplicable = 1, hasCtrlDep = 1, hasSideEffects = 1 in { + def ARG_DECL : HSAILInst<(outs), (ins PtrRC:$symbol, BrigType:$TypeLength, ArraySize:$size, + ArgDeclAlignment:$alignment), + "${alignment}arg$TypeLength $symbol$size", []>; +} + +// FIXME: If the MEMOP isn't explicitly typed in output, counts as +// wrong number of operands. +class LDPat : Pat < + (vt (ldnode (LoadAddr MEMOP:$address, + BrigSegment:$segment, + BrigAlignment:$align, + BrigType:$TypeLength, + BrigWidth:$width, + BrigMemoryModifierMask:$mask))), + (inst MEMOP:$address, $TypeLength, $segment, $align, $width, $mask) +>; + + +defm LD : LD_Types<"ld", BrigOpcode.LD>; + + +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; +def : LDPat; + +let hasSideEffects = 1, hasCtrlDep = 1 in { + // It is not safe to move ld_arg as it can be in an argscope + defm RARG_LD : LD_Types<"ld", BrigOpcode.LD>; +} + +class STPat : Pat < + (node (vt (GPROrImm vt:$src)), + (StoreAddr MEMOP:$address, + BrigSegment:$segment, + BrigAlignment:$alignment, + BrigType:$TypeLength)), + (inst $src, MEMOP:$address, $TypeLength, $segment, $alignment) +>; + +defm ST : ST_Types<"st", BrigOpcode.ST>; + +// TODO: Promote float stores to integers. +def : STPat; +def : STPat; +def : STPat; +def : STPat; +def : STPat; +def : STPat; + + +// We need pseudos to implement condition register spilling due to a +// limitation storeRegToStackSlot currently has where it assumes only +// 1 instruction is created for spilling. +let isPseudo = 1 in { + def SPILL_B1 : ST<"spill_b1", BrigOpcode.NOP, + (ins CR:$src, MEMOP:$address, + BrigType:$TypeLength, BrigSegment:$segment, + BrigAlignment:$align) + >; + + def RESTORE_B1 : LD<"restore_b1", BrigOpcode.NOP, + (outs CR:$dest) + >; +} + +//////////////////////////////////////////////////////////////////////////////// +// load memory address + +defm LDA : InstAddr_1Op_PtrTypes<"lda", BrigOpcode.LDA>; Index: lib/Target/HSAIL/HSAILMCInstLower.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMCInstLower.h @@ -0,0 +1,34 @@ +//===- HSAILMCInstLower.h MachineInstr Lowering Interface -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILMCINSTLOWER_H +#define LLVM_LIB_TARGET_HSAIL_HSAILMCINSTLOWER_H + +namespace llvm { + +class HSAILAsmPrinter; +class MachineInstr; +class MCContext; +class MCInst; + +class HSAILMCInstLower { + MCContext &Ctx; + const HSAILAsmPrinter &AP; + +public: + HSAILMCInstLower(MCContext &Ctx, const HSAILAsmPrinter &AP); + + /// \brief Lower a MachineInstr to an MCInst + void lower(const MachineInstr *MI, MCInst &OutMI) const; +}; + +} // End namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILMCInstLower.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMCInstLower.cpp @@ -0,0 +1,91 @@ +//===- HSAILMCInstLower.cpp - Lower HSAIL MachineInstr to an MCInst ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Code to lower HSAIL MachineInstrs to their corresponding MCInst. +// +//===----------------------------------------------------------------------===// +// + +#include "HSAILMCInstLower.h" +#include "HSAIL.h" +#include "HSAILAsmPrinter.h" +#include "InstPrinter/HSAILInstPrinter.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" + +using namespace llvm; + +HSAILMCInstLower::HSAILMCInstLower(MCContext &ctx, const HSAILAsmPrinter &ap) + : Ctx(ctx), AP(ap) {} + +void HSAILMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { + + OutMI.setOpcode(MI->getOpcode()); + + for (const MachineOperand &MO : MI->explicit_operands()) { + MCOperand MCOp; + switch (MO.getType()) { + default: + llvm_unreachable("unknown operand type"); + case MachineOperand::MO_FPImmediate: { + const APFloat &FloatValue = MO.getFPImm()->getValueAPF(); + + if (&FloatValue.getSemantics() == &APFloat::IEEEsingle) + MCOp = MCOperand::createFPImm(FloatValue.convertToFloat()); + else if (&FloatValue.getSemantics() == &APFloat::IEEEdouble) + MCOp = MCOperand::createFPImm(FloatValue.convertToDouble()); + else + llvm_unreachable("Unhandled floating point type"); + break; + } + case MachineOperand::MO_Immediate: + MCOp = MCOperand::createImm(MO.getImm()); + break; + case MachineOperand::MO_Register: + MCOp = MCOperand::createReg(MO.getReg()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::createExpr( + MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: { + const GlobalValue *GV = MO.getGlobal(); + + SmallString<256> Name; + AP.getHSAILMangledName(Name, GV); + + MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); + + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); + break; + } + case MachineOperand::MO_ExternalSymbol: { + MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine('%') + MO.getSymbolName()); + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); + break; + } + case MachineOperand::MO_MCSymbol: { + MCSymbol *Sym = MO.getMCSymbol(); + MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); + break; + } + case MachineOperand::MO_TargetIndex: { + llvm_unreachable("Don't know how to lower target index"); + break; + } + } + OutMI.addOperand(MCOp); + } +} Index: lib/Target/HSAIL/HSAILMachineFunctionInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMachineFunctionInfo.h @@ -0,0 +1,290 @@ +//==-- HSAILMachineFunctionInfo.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file declares HSAIL-specific per-machine-function information +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_HSAIL_HSAILMACHINEFUNCTIONINFO_H + +#include "HSAIL.h" +#include "HSAILKernel.h" +#include "HSAILParamManager.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/IR/Function.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include +#include +#include + +namespace llvm { +class HSAILSubtarget; +class HSAILPrintfInfo { + uint32_t mPrintfID; + SmallVector mOperands; + +public: + void addOperand(size_t idx, uint32_t size); + uint32_t getPrintfID(); + void setPrintfID(uint32_t idx); + size_t getNumOperands(); + uint32_t getOperandID(uint32_t idx); +}; // class HSAILPrintfInfo + +enum NameDecorationStyle { NONE, StdCall, FastCall }; +typedef struct SamplerInfoRec { + std::string name; // The name of the sampler + uint32_t val; // The value of the sampler + uint32_t idx; // The sampler resource id +} SamplerInfo; +// Some typedefs that will help with using the various iterators +// of the machine function info class. +typedef StringMap::iterator sampler_iterator; +typedef DenseSet::iterator func_iterator; +typedef DenseSet::iterator intr_iterator; +typedef DenseSet::iterator sema_iterator; +typedef DenseSet::iterator read_image1d_iterator; +typedef DenseSet::iterator write_image1d_iterator; +typedef DenseSet::iterator read_image1d_array_iterator; +typedef DenseSet::iterator write_image1d_array_iterator; +typedef DenseSet::iterator read_image1d_buffer_iterator; +typedef DenseSet::iterator write_image1d_buffer_iterator; +typedef DenseSet::iterator read_image2d_iterator; +typedef DenseSet::iterator write_image2d_iterator; +typedef DenseSet::iterator read_image2d_array_iterator; +typedef DenseSet::iterator write_image2d_array_iterator; +typedef DenseSet::iterator read_image3d_iterator; +typedef DenseSet::iterator write_image3d_iterator; +typedef DenseSet::iterator read_ptr_iterator; +typedef DenseSet::iterator error_iterator; +typedef std::map::iterator printf_iterator; +typedef std::set::iterator func_md_iterator; +typedef std::vector::iterator kernel_md_iterator; +// HSAILMachineFunctionInfo - This class is +// derived from MachineFunction private +// hsail target-specific information for each MachineFunction +class HSAILMachineFunctionInfo : public MachineFunctionInfo { + // The size in bytes required to host all of the kernel arguments. + // -1 means this value has not been determined yet. + int32_t mArgSize; + + // The size in bytes required to host the stack and the kernel arguments + // in private memory. + // -1 means this value has not been determined yet. + int32_t mScratchSize; + + // The size in bytes required to host the the kernel arguments + // on the stack. + // -1 means this value has not been determined yet. + int32_t mStackSize; + + // The size in bytes required to host private variables + // -1 means this value has not been determined yet. + int32_t mPrivateMemSize; + + // The size in bytes required to host group variables + // -1 means this value has not been determined yet. + int32_t mGroupMemSize; + + /// A map of constant to literal mapping for all of the 32bit or + /// smaller literals in the current function. + std::map mIntLits; + + /// A map of name to sampler information that is used to emit + /// metadata to the IL stream that the runtimes can use for + /// hardware setup. + StringMap mSamplerMap; + + /// Set of all functions that this function calls. + DenseSet mFuncs; + + /// Set of all intrinsics that this function calls. + DenseSet mIntrs; + + /// Set of all the raw uavs. + DenseSet mRawUAV; + + /// Set of all semaphores + DenseSet mSemaphore; + + /// Set of all the read-only pointers + DenseSet mReadPtr; + + /// A set of all errors that occured in the backend for this function. + DenseSet mErrors; + + /// A set of all of the metadata that is used for the current function. + std::set mMetadataFunc; + + /// A set of all of the metadata that is used for the function wrapper. + std::vector mMetadataKernel; + + SmallVector mArgRegs; + + /// A number of 64 bit register slots reserved for $s registers. + unsigned RegisterPartitioning; + + /// Information about the kernel, NULL if the function is not a kernel. + HSAILKernel *mKernel; + + /// Pointer to the machine function that this information belongs to. + MachineFunction *mMF; + + /// Pointer to the subtarget for this function. + const HSAILSubtarget *mSTM; + + bool HasSpilledCRs; + bool HasScavengerSpill; + +public: + explicit HSAILMachineFunctionInfo(MachineFunction &MF); + + // FIXME: Remove these + void setUsesLocal() {} + void setUsesRegion() {} + + bool usesHWConstant(std::string name) const; + bool isKernel() const; + HSAILKernel *getKernel(); + + /// Get the size in bytes that are required to host all of + /// arguments and stack memory in scratch. + uint32_t getScratchSize(); + + /// Get the size in bytes that are required to host all of + /// private memory in scratch. + size_t getPrivateSize(); + + /// Get the size in bytes that are required to host all of + /// group memory. + size_t getGroupSize(); + + /// Get the size in bytes that is required to host all of + /// the arguments on the stack. + uint32_t getStackSize(); + + /// + /// @param val value to add the lookup table + /// @param Opcode opcode of the literal instruction + /// @brief adds the specified value of the type represented by the + /// Opcode + /// to the literal to integer and integer to literal mappings. + /// + /// Add a 32bit integer value to the literal table. + // uint32_t addi32Literal(uint32_t val, int Opcode = HSAIL::LOADCONST_i32); + uint32_t addi32Literal(uint32_t val, int Opcode = 0); + + // Iterators that point to the beginning and end of the sampler map. + sampler_iterator sampler_begin() { return mSamplerMap.begin(); } + sampler_iterator sampler_end() { return mSamplerMap.end(); } + + /// Add called functions to the set of all functions this function calls. + void addCalledFunc(uint32_t id) { mFuncs.insert(id); } + void eraseCalledFunc(uint32_t id) { mFuncs.erase(id); } + size_t func_size() { return mFuncs.size(); } + bool func_empty() { return mFuncs.empty(); } + func_iterator func_begin() { return mFuncs.begin(); } + func_iterator func_end() { return mFuncs.end(); } + + inline iterator_range funcs() { + return iterator_range(func_begin(), func_end()); + } + + /// Add a semaphore + void sema_insert(uint32_t id) { mSemaphore.insert(id); } + bool sema_count(uint32_t id) { return mSemaphore.count(id); } + size_t sema_size() { return mSemaphore.size(); } + sema_iterator sema_begin() { return mSemaphore.begin(); } + sema_iterator sema_end() { return mSemaphore.end(); } + + /// Add a raw uav id. + void uav_insert(uint32_t id) { mRawUAV.insert(id); } + + /// Add a pointer to the known set of read-only pointers + void add_read_ptr(const Value *ptr) { mReadPtr.insert(ptr); } + bool read_ptr_count(const Value *ptr) { return mReadPtr.count(ptr); } + bool read_size() { return mReadPtr.size(); } + + // Add an error to the output for the current function. + typedef enum { + RELEASE_ONLY, /// Only emit error message in release mode. + DEBUG_ONLY, /// Only emit error message in debug mode. + ALWAYS /// Always emit the error message. + } ErrorMsgEnum; + + // FIXME: Remove these and use normal error reporting mechanism. + /// Add an error message to the set of all error messages. + void addErrorMsg(const char *msg, ErrorMsgEnum val = ALWAYS); + bool errors_empty() { return mErrors.empty(); } + error_iterator errors_begin() { return mErrors.begin(); } + error_iterator errors_end() { return mErrors.end(); } + + /// Add a string to the metadata set for a function/kernel wrapper + void addMetadata(const char *md, bool kernelOnly = false); + void addMetadata(std::string md, bool kernelOnly = false); + func_md_iterator func_md_begin() { return mMetadataFunc.begin(); } + func_md_iterator func_md_end() { return mMetadataFunc.end(); } + kernel_md_iterator kernel_md_begin() { return mMetadataKernel.begin(); } + kernel_md_iterator kernel_md_end() { return mMetadataKernel.end(); } + + /// Query to find out if we are a signed or unsigned integer type. + bool isSignedIntType(const Value *ptr); + + /// Query to find out if we are a volatile pointer. + bool isVolatilePointer(const Value *ptr); + + /// Query to find out if we are a restrict pointer. + bool isRestrictPointer(const Value *ptr); + + /// Query to find out if we are a constant argument. + bool isConstantArgument(const Value *ptr); + + /// add/retrieve the argument registers numbers + void addArgReg(unsigned arg) { mArgRegs.push_back(arg); } + unsigned getArgReg(unsigned arg) { + return (arg < mArgRegs.size()) ? mArgRegs[arg] : arg; + } + + void setRegisterPartitioning(unsigned RegSlots) { + RegisterPartitioning = RegSlots; + } + unsigned getRegisterPartitioning() const { return RegisterPartitioning; } + + HSAILParamManager &getParamManager() { return ParamManager; } + const HSAILParamManager &getParamManager() const { return ParamManager; } + + + bool hasSpilledCRs() const { + return HasSpilledCRs; + } + + void setHasSpilledCRs(bool Spill = true) { + HasSpilledCRs = Spill; + } + + bool hasScavengerSpill() const { + return HasScavengerSpill; + } + + void setHasScavengerSpill(bool Spill = true) { + HasScavengerSpill = Spill; + } + +private: + HSAILParamManager ParamManager; +}; +} // llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILMachineFunctionInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMachineFunctionInfo.cpp @@ -0,0 +1,386 @@ +//===-- HSAILMachineFunctionInfo.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILMachineFunctionInfo.h" +#include "HSAILModuleInfo.h" +#include "HSAILUtilityFunctions.h" +using namespace llvm; + +static const HSAILConstPtr *getConstPtr(const HSAILKernel *krnl, + const std::string &arg) { + if (!krnl) { + return nullptr; + } + + SmallVector::const_iterator begin, end; + for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end(); begin != end; + ++begin) { + if (!strcmp(begin->name.data(), arg.c_str())) { + return &(*begin); + } + } + return nullptr; +} + +void HSAILPrintfInfo::addOperand(size_t idx, uint32_t size) { + mOperands.resize((unsigned)(idx + 1)); + mOperands[(unsigned)idx] = size; +} + +uint32_t HSAILPrintfInfo::getPrintfID() { return mPrintfID; } + +void HSAILPrintfInfo::setPrintfID(uint32_t id) { mPrintfID = id; } + +size_t HSAILPrintfInfo::getNumOperands() { return mOperands.size(); } + +uint32_t HSAILPrintfInfo::getOperandID(uint32_t idx) { return mOperands[idx]; } + +HSAILMachineFunctionInfo::HSAILMachineFunctionInfo(MachineFunction &MF) + : RegisterPartitioning(0), + HasSpilledCRs(false), + HasScavengerSpill(false), + ParamManager(MF.getTarget().getDataLayout()) { + const Function *F = MF.getFunction(); + mMF = &MF; + MachineModuleInfo &mmi = MF.getMMI(); + const HSAILTargetMachine *TM = + reinterpret_cast(&MF.getTarget()); + HSAILModuleInfo *AMI = &(mmi.getObjFileInfo()); + AMI->processModule(mmi.getModule(), TM); + for (Module::const_iterator I = F->getParent()->begin(), + E = F->getParent()->end(); + I != E; ++I) { + // Map all the known names to a unique number + AMI->getOrCreateFunctionID(I->getName()); + } + mSTM = TM->getSubtargetImpl(); + mKernel = AMI->getKernel(F->getName()); + + mScratchSize = -1; + mPrivateMemSize = -1; + mGroupMemSize = -1; + mArgSize = -1; + mStackSize = -1; +} + +bool HSAILMachineFunctionInfo::usesHWConstant(std::string name) const { + const HSAILConstPtr *curConst = getConstPtr(mKernel, name); + if (curConst) { + return curConst->usesHardware; + } else { + return false; + } +} + +bool HSAILMachineFunctionInfo::isKernel() const { + return mKernel != nullptr && mKernel->mKernel; +} + +HSAILKernel *HSAILMachineFunctionInfo::getKernel() { return mKernel; } + +uint32_t HSAILMachineFunctionInfo::getScratchSize() { + const DataLayout *DL = mMF->getTarget().getDataLayout(); + + if (mScratchSize == -1) { + mScratchSize = 0; + Function::const_arg_iterator I = mMF->getFunction()->arg_begin(); + Function::const_arg_iterator Ie = mMF->getFunction()->arg_end(); + while (I != Ie) { + // FIXME: Mishandling byval structs + Type *curType = I->getType(); + mScratchSize += RoundUpToAlignment(DL->getTypeStoreSize(curType), 16); + ++I; + } + // mScratchSize += ((mScratchSize + 15) & ~15); // possible typo: doubling + // mScratchSize + } + return (uint32_t)mScratchSize; +} + +size_t HSAILMachineFunctionInfo::getPrivateSize() { + if (mPrivateMemSize == -1) { + const DataLayout *DL = mMF->getTarget().getDataLayout(); + + mPrivateMemSize = 0; + SmallPtrSet thisFuncPvtVarsSet; + for (MachineFunction::const_iterator I = mMF->begin(), E = mMF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *LastMI = II; + for (unsigned int opNum = 0; opNum < LastMI->getNumOperands(); + opNum++) { + const MachineOperand &MO = LastMI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_GlobalAddress) { + if (const GlobalVariable *GV = + dyn_cast(MO.getGlobal())) { + if (GV->getType()->getAddressSpace() == + HSAILAS::PRIVATE_ADDRESS) { + if (thisFuncPvtVarsSet.insert(GV).second) { + mPrivateMemSize += + DL->getTypeAllocSize(GV->getType()->getElementType()); + } + } + } + } + } + } + } + mPrivateMemSize = ((mPrivateMemSize + 15) & ~15); + } + return (uint32_t)mPrivateMemSize; +} + +size_t HSAILMachineFunctionInfo::getGroupSize() { + if (mGroupMemSize == -1) { + const DataLayout *DL = mMF->getTarget().getDataLayout(); + + mGroupMemSize = 0; + SmallPtrSet thisFuncGrpVarsSet; + for (MachineFunction::const_iterator I = mMF->begin(), E = mMF->end(); + I != E; ++I) { + for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); + II != IE; ++II) { + const MachineInstr *LastMI = II; + for (unsigned int opNum = 0; opNum < LastMI->getNumOperands(); + opNum++) { + const MachineOperand &MO = LastMI->getOperand(opNum); + if (MO.getType() == MachineOperand::MO_GlobalAddress) { + if (const GlobalVariable *GV = + dyn_cast(MO.getGlobal())) { + if (GV->getType()->getAddressSpace() == HSAILAS::GROUP_ADDRESS) { + if (thisFuncGrpVarsSet.insert(GV).second) { + mGroupMemSize += + DL->getTypeAllocSize(GV->getType()->getElementType()); + } + } + } + } + } + } + } + mGroupMemSize = ((mGroupMemSize + 15) & ~15); + } + return (uint32_t)mGroupMemSize; +} + +uint32_t HSAILMachineFunctionInfo::getStackSize() { + if (mStackSize == -1) { + uint32_t privSize = 0; + const MachineFrameInfo *MFI = mMF->getFrameInfo(); + privSize = MFI->getOffsetAdjustment() + MFI->getStackSize(); + const HSAILTargetMachine *TM = + reinterpret_cast(&mMF->getTarget()); + bool addStackSize = TM->getOptLevel() == CodeGenOpt::None; + Function::const_arg_iterator I = mMF->getFunction()->arg_begin(); + Function::const_arg_iterator Ie = mMF->getFunction()->arg_end(); + while (I != Ie) { + Type *curType = I->getType(); + ++I; + if (dyn_cast(curType)) { + Type *CT = dyn_cast(curType)->getElementType(); + if (CT->isStructTy() && + dyn_cast(curType)->getAddressSpace() == + HSAILAS::PRIVATE_ADDRESS) { + addStackSize = true; + } + } + } + if (addStackSize) { + privSize += getScratchSize(); + } + mStackSize = privSize; + } + return (uint32_t)mStackSize; +} + +// FIXME: Remove this +uint32_t HSAILMachineFunctionInfo::addi32Literal(uint32_t val, int Opcode) { + return mIntLits[val]; +} + +void HSAILMachineFunctionInfo::addErrorMsg(const char *msg, ErrorMsgEnum val) { + if (val == DEBUG_ONLY) { +#if defined(DEBUG) || defined(_DEBUG) + mErrors.insert(msg); +#endif + } else if (val == RELEASE_ONLY) { +#if !defined(DEBUG) && !defined(_DEBUG) + mErrors.insert(msg); +#endif + } else if (val == ALWAYS) { + mErrors.insert(msg); + } +} + +void HSAILMachineFunctionInfo::addMetadata(const char *md, bool kernelOnly) { + addMetadata(std::string(md), kernelOnly); +} + +void HSAILMachineFunctionInfo::addMetadata(std::string md, bool kernelOnly) { + if (kernelOnly) { + mMetadataKernel.push_back(md); + } else { + mMetadataFunc.insert(md); + } +} + +bool HSAILMachineFunctionInfo::isSignedIntType(const Value *ptr) { + if (!mSTM->supportMetadata30()) + return true; + std::string signedNames = "llvm.signedOrSignedpointee.annotations."; + std::string argName = ptr->getName(); + if (!mMF) + return false; + signedNames += mMF->getFunction()->getName(); + const GlobalVariable *GV = + mMF->getFunction()->getParent()->getGlobalVariable(signedNames); + if (!GV || !GV->hasInitializer()) + return false; + const ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return false; + for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) { + const ConstantExpr *nameField = + dyn_cast(CA->getOperand(start)); + if (!nameField) + continue; + + const GlobalVariable *nameGV = + dyn_cast(nameField->getOperand(0)); + if (!nameGV || !nameGV->hasInitializer()) + continue; + + const ConstantDataSequential *nameArray = + dyn_cast(nameGV->getInitializer()); + if (!nameArray) + continue; + + std::string nameStr = nameArray->getAsString(); + // We don't want to include the newline + if (!nameStr.compare(0, nameStr.length() - 1, argName)) + return true; + } + return false; +} +bool HSAILMachineFunctionInfo::isVolatilePointer(const Value *ptr) { + if (!mSTM->supportMetadata30()) + return false; + std::string signedNames = "llvm.volatilepointer.annotations."; + std::string argName = ptr->getName(); + if (!mMF) + return false; + signedNames += mMF->getFunction()->getName(); + const GlobalVariable *GV = + mMF->getFunction()->getParent()->getGlobalVariable(signedNames); + if (!GV || !GV->hasInitializer()) + return false; + const ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return false; + for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) { + const ConstantExpr *nameField = + dyn_cast(CA->getOperand(start)); + if (!nameField) + continue; + + const GlobalVariable *nameGV = + dyn_cast(nameField->getOperand(0)); + if (!nameGV || !nameGV->hasInitializer()) + continue; + + const ConstantDataSequential *nameArray = + dyn_cast(nameGV->getInitializer()); + if (!nameArray) + continue; + + std::string nameStr = nameArray->getAsString(); + // We don't want to include the newline + if (!nameStr.compare(0, nameStr.length() - 1, argName)) + return true; + } + return false; +} +bool HSAILMachineFunctionInfo::isRestrictPointer(const Value *ptr) { + if (!mSTM->supportMetadata30()) + return false; + std::string signedNames = "llvm.restrictpointer.annotations."; + std::string argName = ptr->getName(); + if (!mMF) + return false; + signedNames += mMF->getFunction()->getName(); + const GlobalVariable *GV = + mMF->getFunction()->getParent()->getGlobalVariable(signedNames); + if (!GV || !GV->hasInitializer()) + return false; + const ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return false; + for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) { + const ConstantExpr *nameField = + dyn_cast(CA->getOperand(start)); + if (!nameField) + continue; + + const GlobalVariable *nameGV = + dyn_cast(nameField->getOperand(0)); + if (!nameGV || !nameGV->hasInitializer()) + continue; + + const ConstantDataSequential *nameArray = + dyn_cast(nameGV->getInitializer()); + if (!nameArray) + continue; + + std::string nameStr = nameArray->getAsString(); + // We don't want to include the newline + if (!nameStr.compare(0, nameStr.length() - 1, argName)) + return true; + } + return false; +} + +bool HSAILMachineFunctionInfo::isConstantArgument(const Value *ptr) { + if (!mSTM->supportMetadata30()) + return false; + std::string signedNames = "llvm.argtypeconst.annotations."; + std::string argName = ptr->getName(); + if (!mMF) + return false; + signedNames += mMF->getFunction()->getName(); + const GlobalVariable *GV = + mMF->getFunction()->getParent()->getGlobalVariable(signedNames); + if (!GV || !GV->hasInitializer()) + return false; + const ConstantArray *CA = dyn_cast(GV->getInitializer()); + if (!CA) + return false; + for (uint32_t start = 0, stop = CA->getNumOperands(); start < stop; ++start) { + const ConstantExpr *nameField = + dyn_cast(CA->getOperand(start)); + if (!nameField) + continue; + + const GlobalVariable *nameGV = + dyn_cast(nameField->getOperand(0)); + if (!nameGV || !nameGV->hasInitializer()) + continue; + + const ConstantDataSequential *nameArray = + dyn_cast(nameGV->getInitializer()); + if (!nameArray) + continue; + + std::string nameStr = nameArray->getAsString(); + // We don't want to include the newline + if (!nameStr.compare(0, nameStr.length() - 1, argName)) + return true; + } + return false; +} Index: lib/Target/HSAIL/HSAILMetadata.hpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILMetadata.hpp @@ -0,0 +1,199 @@ +// +// taken from hsa/compiler/lib/include/aclTypes.h +// + +#ifndef _HSAIL_METADATA_HPP +#define _HSAIL_METADATA_HPP + +typedef struct _md_arg_type_0_7 argType; +typedef struct _md_printf_fmt_0_7 printfFmt; + +// Enumerations for the various argument types. +typedef enum argTypeEnum { + ARG_TYPE_ERROR = 0, + ARG_TYPE_SAMPLER = 1, + ARG_TYPE_IMAGE = 2, + ARG_TYPE_COUNTER = 3, + ARG_TYPE_VALUE = 4, + ARG_TYPE_POINTER = 5, + ARG_TYPE_SEMAPHORE = 6, + ARG_TYPE_QUEUE = 7, // enum for device enqueue + ARG_TYPE_LAST = 8 +} ArgType; + +// Enumerations of the valid data types for pass by value and +// pass by pointer kernel arguments. +typedef enum dataTypeEnum { + DATATYPE_ERROR = 0, + DATATYPE_i1 = 1, + DATATYPE_i8 = 2, + DATATYPE_i16 = 3, + DATATYPE_i32 = 4, + DATATYPE_i64 = 5, + DATATYPE_u8 = 6, + DATATYPE_u16 = 7, + DATATYPE_u32 = 8, + DATATYPE_u64 = 9, + DATATYPE_f16 = 10, + DATATYPE_f32 = 11, + DATATYPE_f64 = 12, + DATATYPE_f80 = 13, + DATATYPE_f128 = 14, + DATATYPE_struct = 15, + DATATYPE_union = 16, + DATATYPE_event = 17, + DATATYPE_opaque = 18, + DATATYPE_unknown = 19, + DATATYPE_LAST = 20 +} ArgDataType; + +// Enumerations of the valid memory types for pass by pointer +// kernel arguments +typedef enum memoryTypeEnum { + PTR_MT_ERROR = 0, // Error + PTR_MT_GLOBAL = 1, // global buffer + PTR_MT_SCRATCH_EMU = 2, // SW emulated private memory + PTR_MT_LDS_EMU = 3, // SW emulated local memory + PTR_MT_UAV = 4, // uniformed access vector memory + PTR_MT_CONSTANT_EMU = 5, // SW emulated constant memory + PTR_MT_GDS_EMU = 6, // SW emulated region memory + PTR_MT_LDS = 7, // HW local memory + PTR_MT_SCRATCH = 8, // HW private memory + PTR_MT_CONSTANT = 9, // HW constant memory + PTR_MT_GDS = 10, // HW region memory + PTR_MT_UAV_SCRATCH = 11, // SI and later HW private memory + PTR_MT_UAV_CONSTANT = 12, // SI and later HW constant memory + PTR_MT_LAST = 13 +} MemoryType; + +// Enumeration that specifies the various access types for a pointer/image. +typedef enum imageTypeEnum { + ACCESS_TYPE_ERROR = 0, + ACCESS_TYPE_RO = 1, + ACCESS_TYPE_WO = 2, + ACCESS_TYPE_RW = 3, + ACCESS_TYPE_LAST = 4 +} AccessType; + +//! An enumeration that maps memory types to index values +//! An enumeration that maps Resource type to index values +typedef enum _rt_gpu_resource_type_rec { + RT_RES_UAV = 0, // UAV resources + RT_RES_PRI = 1, // Private resources + RT_RES_LDS = 2, // LDS resources + RT_RES_GDS = 3, // GDS resources + RT_RES_CON = 4, // Constant resources + RT_RES_LAST = 5 +} aclGPUResource; + +typedef enum _rt_gpu_mem_sizes { + RT_MEM_HW_LOCAL = 0, + RT_MEM_SW_LOCAL = 1, + RT_MEM_HW_PRIVATE = 2, + RT_MEM_SW_PRIVATE = 3, + RT_MEM_HW_REGION = 4, + RT_MEM_SW_REGION = 5, + RT_MEM_LAST = 6 +} aclGPUMemSizes; + +typedef struct _md_arg_type_0_7 { + size_t struct_size; + size_t argNameSize; + size_t typeStrSize; + const char *argStr; + const char *typeStr; + union { + struct { // Struct for sampler arguments + unsigned ID; + unsigned isKernelDefined; + unsigned value; + } sampler; + struct { // Struct for image arguments + unsigned resID; + unsigned cbNum; + unsigned cbOffset; + AccessType type; + bool is2D; + bool is1D; + bool isArray; + bool isBuffer; + } image; + struct { // struct for atomic counter arguments + unsigned is32bit; + unsigned resID; + unsigned cbNum; + unsigned cbOffset; + } counter; + struct { // struct for semaphore arguments + unsigned resID; + unsigned cbNum; + unsigned cbOffset; + } sema; + struct { // struct for pass by value arguments + unsigned numElements; + unsigned cbNum; + unsigned cbOffset; + ArgDataType data; + } value; + struct { // struct for pass by pointer arguments + unsigned numElements; + unsigned cbNum; + unsigned cbOffset; + unsigned bufNum; + unsigned align; + ArgDataType data; + MemoryType memory; + AccessType type; + bool isVolatile; + bool isRestrict; + bool isPipe; + } pointer; + } arg; + ArgType type; + bool isConst; +} argType_0_7; + +//! A structure that holds information for printf +// The format in memory of this structure is +// ------------ +// | printfFmt| +// ------------ +// |->argSizes| +// ------------ +// |->fmrStr | +// ------------ + +typedef struct _md_printf_fmt_0_7 { + size_t struct_size; + unsigned ID; + size_t numSizes; + size_t fmtStrSize; + uint32_t *argSizes; + const char *fmtStr; +} printfFmt_0_7; + +//! A structure that holds the metadata in the RODATA section. +typedef struct _cl_metadata_0_7 { + size_t struct_size; // This holds the size of the structure itself for + // versioning. + size_t data_size; // This holds the size of all the memory allocated for this + // structure. + uint32_t major, minor, revision, gpuCaps, funcID; + uint32_t gpuRes[RT_RES_LAST]; + uint32_t wgs[3]; + uint32_t wrs[3]; + size_t kernelNameSize; + size_t deviceNameSize; + size_t mem[RT_MEM_LAST]; + size_t numArgs; + size_t numPrintf; + + argType *args; + printfFmt *printf; + const char *kernelName; + const char *deviceName; + bool enqueue_kernel; + uint32_t kernel_index; +} CLMetadata_0_7; + +#endif // _HSAIL_METADATA_HPP Index: lib/Target/HSAIL/HSAILModuleInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILModuleInfo.h @@ -0,0 +1,105 @@ +//==-- HSAILModuleInfo.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +/// \file +/// This is an MMI implementation for HSAIL targets. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILMODULEINFO_H +#define LLVM_LIB_TARGET_HSAIL_HSAILMODULEINFO_H + +#include "HSAIL.h" +#include "HSAILKernel.h" +#include "llvm/IR/Module.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace llvm { +class Argument; +class TypeSymbolTable; +class GlobalValue; +class MachineFunction; +class GlobalValue; + +class HSAILMachineFunctionInfo; +class HSAILModuleInfo : public MachineModuleInfoImpl { + + void parseEdgGlobalVariables(const Module *M, const HSAILTargetMachine *mTM); + +protected: + const MachineModuleInfo *mMMI; + +public: + HSAILModuleInfo(const MachineModuleInfo &); + virtual ~HSAILModuleInfo(); + + void processModule(const Module *MF, const HSAILTargetMachine *mTM); + + /// Process the given module and parse out the global variable metadata passed + /// down from the frontend-compiler + + /// Returns true if the image ID corresponds to a read only image. + bool isReadOnlyImage(StringRef Name, uint32_t iID) const; + + /// Returns true if the image ID corresponds to a write only image. + bool isWriteOnlyImage(StringRef Name, uint32_t iID) const; + + /// Returns true if the image ID corresponds to a read write image. + bool isReadWriteImage(StringRef name, uint32_t iID) const; + + /// Get a reference to the kernel metadata information for the given function + /// name. + HSAILKernel *getKernel(StringRef Name); + + /// Query if the constant argument uses hardware or not + bool usesHWConstant(const HSAILKernel *krnl, StringRef Arg); + + /// Query the constant buffer number for a constant pointer. + uint32_t getConstPtrCB(const HSAILKernel *krnl, StringRef Arg); + + /// Get the unique function ID for the specific function name and create a new + /// unique ID if it is not found. + uint32_t getOrCreateFunctionID(const GlobalValue *func); + uint32_t getOrCreateFunctionID(const std::string &func); + + void add_printf_offset(uint32_t offset) { mPrintfOffset += offset; } + uint32_t get_printf_offset() { return mPrintfOffset; } + +public: + StringMap mKernels; + +private: + StringMap mKernelArgs; + StringMap mArrayMems; + StringMap mFuncNames; + DenseMap mFuncPtrNames; + DenseMap mImageNameMap; + StringMap> mSamplerSet; + std::set mByteStore; + std::set mIgnoreStr; + DenseMap mArgIDMap; + const char *symTab; + const HSAILSubtarget *mSTM; + size_t mOffset; + uint32_t mReservedBuffs; + uint32_t mCurrentCPOffset; + uint32_t mPrintfOffset; + bool mProcessed; +}; +} // end namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILModuleInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILModuleInfo.cpp @@ -0,0 +1,123 @@ +//===-- HSAILModuleInfo.cpp -----------------------------------------------===// + +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILModuleInfo.h" +#include "HSAILTargetMachine.h" + +using namespace llvm; + +HSAILModuleInfo::HSAILModuleInfo(const MachineModuleInfo &MMI) { + mMMI = &MMI; + mOffset = 0; + mReservedBuffs = 0; + symTab = nullptr; + mCurrentCPOffset = 0; + mPrintfOffset = 0; + mProcessed = false; +} + +HSAILModuleInfo::~HSAILModuleInfo() { + for (StringMap::iterator kb = mKernels.begin(), + ke = mKernels.end(); + kb != ke; ++kb) { + HSAILKernel *ptr = kb->getValue(); + delete ptr; + } +} + +static const HSAILConstPtr *getConstPtr(const HSAILKernel *krnl, + const std::string &arg) { + if (!krnl) { + return nullptr; + } + SmallVector::const_iterator begin, end; + for (begin = krnl->constPtr.begin(), end = krnl->constPtr.end(); begin != end; + ++begin) { + if (!strcmp(begin->name.data(), arg.c_str())) { + return &(*begin); + } + } + return nullptr; +} + +void HSAILModuleInfo::processModule(const Module *M, + const HSAILTargetMachine *mTM) { + mSTM = mTM->getSubtargetImpl(); + if (mProcessed) { + return; + } + + // Make sure we only process the module once even though this function + // is called everytime a MachineFunctionInfo object is instantiated. + mProcessed = true; +} + +HSAILKernel *HSAILModuleInfo::getKernel(StringRef name) { + StringMap::iterator iter = mKernels.find(name); + if (iter == mKernels.end()) { + return nullptr; + } else { + return iter->second; + } +} + +bool HSAILModuleInfo::isWriteOnlyImage(StringRef name, uint32_t iID) const { + const StringMap::const_iterator kiter = mKernels.find(name); + if (kiter == mKernels.end()) { + return false; + } + return kiter->second->writeOnly.count(iID); +} + +bool HSAILModuleInfo::isReadOnlyImage(StringRef name, uint32_t iID) const { + const StringMap::const_iterator kiter = mKernels.find(name); + if (kiter == mKernels.end()) { + return false; + } + return kiter->second->readOnly.count(iID); +} + +bool HSAILModuleInfo::isReadWriteImage(StringRef name, uint32_t iID) const { + const StringMap::const_iterator kiter = mKernels.find(name); + if (kiter == mKernels.end()) { + return false; + } + return kiter->second->readWrite.count(iID); +} + +bool HSAILModuleInfo::usesHWConstant(const HSAILKernel *krnl, StringRef arg) { + const HSAILConstPtr *curConst = getConstPtr(krnl, arg); + if (!curConst) { + return false; + } + return curConst->usesHardware; +} + +uint32_t HSAILModuleInfo::getConstPtrCB(const HSAILKernel *krnl, + StringRef Arg) { + const HSAILConstPtr *curConst = getConstPtr(krnl, Arg); + if (!curConst) { + return 0; + } + return curConst->cbNum; +} + +uint32_t HSAILModuleInfo::getOrCreateFunctionID(const std::string &func) { + const unsigned int RESERVED_FUNCS = 1024; + + uint32_t id; + if (mFuncNames.find(func) == mFuncNames.end()) { + id = mFuncNames.size() + RESERVED_FUNCS + mFuncPtrNames.size(); + mFuncNames[func] = id; + } else { + id = mFuncNames[func]; + } + return id; +} Index: lib/Target/HSAIL/HSAILNodes.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILNodes.td @@ -0,0 +1,152 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +def HSAILDTIntTernaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> +]>; + +def HSAILDTBitExtractOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisSameAs<2, 3>, SDTCisInt<2> +]>; + +def HSAILLdExpOp : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] +>; + +def HSAILActiveLanePermuteOp : SDTypeProfile<1, 5, + [SDTCisInt<0>, SDTCisInt<1>, + SDTCisSameAs<0, 2>, SDTCisInt<3>, SDTCisSameAs<0, 4>, SDTCisInt<5>] +>; + +def HSAILActiveLaneIdOp : SDTypeProfile<1, 1, + [SDTCisInt<0>, SDTCisInt<1>] +>; + +def HSAILActiveLaneCountOp : SDTypeProfile<1, 2, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>] +>; + +def HSAILActiveLaneMaskOp : SDTypeProfile<4, 2, + [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, + SDTCisInt<4>, SDTCisInt<5>] +>; + +def HSAILFPClassOp : SDTypeProfile<1, 2, + [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>] +>; + +def HSAILLdaOp : SDTypeProfile<1, 2, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>] +>; + +// i1 = segment, i1:nonull, ptr +def HSAILSegmentPOp : SDTypeProfile<1, 3, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisInt<3>] +>; + +def HSAILArgLdOp : SDTypeProfile<1, 4, + [SDTCisInt<1>, SDTCisInt<2>, SDTCisSameAs<2, 3>] +>; + +def HSAILArgStOp : SDTypeProfile<0, 4, + [SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>] +>; + +//===----------------------------------------------------------------------===// +// Call/Return DAG Nodes +//===----------------------------------------------------------------------===// +def IL_callseq_start : SDNode<"ISD::CALLSEQ_START", SDTIL_CallSeqStart, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; + +def IL_callseq_end : SDNode<"ISD::CALLSEQ_END", SDTIL_CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPSideEffect]>; + +def HSAILret : SDNode<"HSAILISD::RET", SDTNone, + [SDNPHasChain, SDNPOptInGlue] +>; + +def HSAILArgLd : SDNode<"HSAILISD::ARG_LD", HSAILArgLdOp, + [SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue] +>; + +def HSAILArgSt : SDNode<"HSAILISD::ARG_ST", HSAILArgStOp, + [SDNPHasChain, SDNPMayStore, SDNPOutGlue, SDNPInGlue] +>; + +def HSAILlda : SDNode<"HSAILISD::LDA", HSAILLdaOp>; + +// out = a - floor(a) +def HSAILfract : SDNode<"HSAILISD::FRACT", SDTFPUnaryOp>; + +def HSAILnfma : SDNode<"HSAILISD::NFMA", SDTFPTernaryOp>; +def HSAILumad : SDNode<"HSAILISD::UMAD", HSAILDTIntTernaryOp>; +def HSAILsmad : SDNode<"HSAILISD::SMAD", HSAILDTIntTernaryOp>; +def HSAILbitselect : SDNode<"HSAILISD::BITSELECT", HSAILDTIntTernaryOp>; +def HSAILsbitextract : SDNode<"HSAILISD::SBITEXTRACT", HSAILDTBitExtractOp>; +def HSAILubitextract : SDNode<"HSAILISD::UBITEXTRACT", HSAILDTBitExtractOp>; + +// out = max(a, b) a and b are signed ints +def HSAILsmax : SDNode<"HSAILISD::SMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = max(a, b) a and b are unsigned ints +def HSAILumax : SDNode<"HSAILISD::UMAX", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a and b are signed ints +def HSAILsmin : SDNode<"HSAILISD::SMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// out = min(a, b) a and b are unsigned ints +def HSAILumin : SDNode<"HSAILISD::UMIN", SDTIntBinOp, + [SDNPCommutative, SDNPAssociative] +>; + +// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when +// performing the mulitply. The result is a 32-bit value. +def HSAILumul24 : SDNode<"HSAILISD::UMUL24", SDTIntBinOp, + [SDNPCommutative] +>; +def HSAILsmul24 : SDNode<"HSAILISD::SMUL24", SDTIntBinOp, + [SDNPCommutative] +>; + +def HSAILumad24 : SDNode<"HSAILISD::UMAD24", HSAILDTIntTernaryOp, + [] +>; +def HSAILsmad24 : SDNode<"HSAILISD::SMAD24", HSAILDTIntTernaryOp, + [] +>; + +def HSAILfldexp : SDNode<"HSAILISD::FLDEXP", HSAILLdExpOp>; + +def HSAILactivelanepermute : SDNode<"HSAILISD::ACTIVELANEPERMUTE", + HSAILActiveLanePermuteOp, [SDNPHasChain, SDNPSideEffect] +>; + +def HSAILactivelaneid : SDNode<"HSAILISD::ACTIVELANEID", + HSAILActiveLaneIdOp, [SDNPHasChain, SDNPSideEffect] +>; + +def HSAILactivelanecount : SDNode<"HSAILISD::ACTIVELANECOUNT", + HSAILActiveLaneCountOp, [SDNPHasChain, SDNPSideEffect] +>; + +def HSAILactivelanemask : SDNode<"HSAILISD::ACTIVELANEMASK", + HSAILActiveLaneMaskOp, [SDNPHasChain, SDNPSideEffect] +>; + +def HSAILclass : SDNode<"HSAILISD::CLASS", HSAILFPClassOp>; + +def HSAILkernargbaseptr : SDNode<"HSAILISD::KERNARGBASEPTR", SDTIntLeaf>; + +def HSAILsegmentp : SDNode<"HSAILISD::SEGMENTP", HSAILSegmentPOp>; Index: lib/Target/HSAIL/HSAILOpaqueTypes.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILOpaqueTypes.h @@ -0,0 +1,68 @@ +//===-- HSAILOpaqueTypes.h - SPIR opaque types ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// \brief This file declares the API for working with SPIR opaque +/// types. This includes images and samplers among other things. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILOPAQUETYPES_H +#define LLVM_LIB_TARGET_HSAIL_HSAILOPAQUETYPES_H + +namespace llvm { + +class Type; + +enum OpaqueType { + NotOpaque, + I1D, + I1DA, + I1DB, + I2D, + I2DA, + I3D, + I2DDepth, + I2DADepth, + C32, + C64, + Sema, + Sampler, + Event, + ReserveId, + CLKEventT, + QueueT, + UnknownOpaque +}; + +OpaqueType GetOpaqueType(const Type *T); + +inline bool IsImage(OpaqueType OT) { + switch (OT) { + default: + return false; + case I1D: + case I1DA: + case I1DB: + case I2D: + case I2DA: + case I3D: + case I2DDepth: + case I2DADepth: + return true; + } +} + +inline bool IsImage(const Type *T) { return IsImage(GetOpaqueType(T)); } + +inline bool IsSampler(const Type *T) { return GetOpaqueType(T) == Sampler; } +} // end namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILOpaqueTypes.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILOpaqueTypes.cpp @@ -0,0 +1,69 @@ +//===-- HSAILOpaqueTypes.cpp - SPIR opaque types --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// +/// \brief This file implements the API for working with SPIR opaque +/// types. This includes images and samplers among other things. +// +//===----------------------------------------------------------------------===// + +#include "HSAILOpaqueTypes.h" + +#include +#include + +using namespace llvm; + +/// \brief Check for an opaque type. +OpaqueType llvm::GetOpaqueType(const Type *T) { + // Handle the degenerate case first. + if (!T) + return NotOpaque; + + const StructType *ST = dyn_cast(T); + + // If the type is not a struct, check if it is a pointer and try to + // extract a struct from there. + if (!ST) { + const PointerType *PT = dyn_cast(T); + + // Not a struct, not a pointer. It can't be opaque. + if (!PT) + return NotOpaque; + + const Type *CT = PT->getElementType(); + ST = dyn_cast(CT); + } + + if (!ST || !ST->isOpaque()) + return NotOpaque; + + return StringSwitch(ST->getName()) + .Cases("opencl.image1d_t", "struct._image1d_t", I1D) + .Cases("opencl.image1d_array_t", "struct._image1d_array_t", I1DA) + .Cases("opencl.image1d_buffer_t", "struct._image1d_buffer_t", I1DB) + .Cases("opencl.image2d_t", "struct._image2d_t", I2D) + .Cases("opencl.image2d_array_t", "struct._image2d_array_t", I2DA) + .Cases("opencl.image3d_t", "struct._image3d_t", I3D) + .Cases("opencl.image2d_depth_t", "struct._image2d_depth_t", I2DDepth) + .Cases("opencl.image2d_array_depth_t", "struct._image2d_array_depth_t", + I2DADepth) + // There is no opaque sampler type in SPIR. The i32 in SPIR is + // lowered to the EDG-stype opaque sampler type. + .Case("struct._sampler_t", Sampler) + .Cases("opencl.event_t", "struct._event_t", Event) + .Case("struct._counter32_t", C32) + .Case("struct._counter64_t", C64) + .Case("struct._sema_t", Sema) + .Case("opencl.reserve_id_t", ReserveId) + .Case("opencl.clk_event_t", CLKEventT) + .Case("opencl.queue_t", QueueT) + .Default(UnknownOpaque); +} Index: lib/Target/HSAIL/HSAILOperands.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILOperands.td @@ -0,0 +1,86 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// Custom memory operands +//===----------------------------------------------------------------------===// +def PtrRC: Operand, PointerLikeRegClass<0>; +def PtrRC32: Operand, PointerLikeRegClass<32>; + +// Memory operand: base, register and offset. +// +// FIXME: iPTR is fundamentally broken for address spaces, should use +// something else. +def MEMOP : Operand { + let MIOperandInfo = (ops iAny:$base, iAny:$reg, iAny:$offset); + let PrintMethod = "printAddrMode3Op"; +} + +def Vec2DestOp32 : Operand { + let MIOperandInfo = (ops HSAILDest32Operand, HSAILDest32Operand); + let PrintMethod = "printVec2Op"; +} + +def Vec3DestOp32 : Operand { + let MIOperandInfo = (ops HSAILDest32Operand, HSAILDest32Operand, HSAILDest32Operand); + let PrintMethod = "printVec3Op"; +} + +def Vec4DestOp32 : Operand { + let MIOperandInfo = (ops HSAILDest32Operand, HSAILDest32Operand, HSAILDest32Operand, HSAILDest32Operand); + let PrintMethod = "printVec4Op"; +} + + +def Vec2DestOp64 : Operand { + let MIOperandInfo = (ops HSAILDest64Operand, HSAILDest64Operand); + let PrintMethod = "printVec2Op"; +} + +def Vec3DestOp64 : Operand { + let MIOperandInfo = (ops HSAILDest64Operand, HSAILDest64Operand, HSAILDest64Operand); + let PrintMethod = "printVec3Op"; +} + +def Vec4DestOp64 : Operand { + let MIOperandInfo = (ops HSAILDest64Operand, HSAILDest64Operand, HSAILDest64Operand, HSAILDest64Operand); + let PrintMethod = "printVec4Op"; +} + + +def calltarget : Operand; + +def GPROrImm : ComplexPattern; + +def ftz : Operand { + let PrintMethod = "printFTZ"; +} + +def nonull : Operand { + let PrintMethod = "printNoNull"; +} + +def equiv : Operand { + let PrintMethod = "printEquiv"; +} + +def v4mod : Operand { + let PrintMethod = "printV4"; +} + +def ArgDeclAlignment : Operand { + let OperandType = "OPERAND_IMMEDIATE"; + let PrintMethod = "printArgDeclAlignment"; +} + +def ArraySize : Operand { + let OperandType = "OPERAND_IMMEDIATE"; + let PrintMethod = "printArraySize"; +} Index: lib/Target/HSAIL/HSAILParamManager.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILParamManager.h @@ -0,0 +1,150 @@ +//===- HSAILParamManager.h - kernel/function arguments -----------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \ file +/// This file defines the HSAILParamManager class, which manages all defined +/// .param variables for a particular function. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILPARAMMANAGER_H +#define LLVM_LIB_TARGET_HSAIL_HSAILPARAMMANAGER_H + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Type.h" + +namespace llvm { + +/// HSAILParamManager - This class manages all parameter variables defined for a +/// particular function. +class HSAILParamManager { +private: + /// HSAILParamType - Type of a kernarg/arg/call param variable + enum HSAILParamType { + HSAIL_PARAM_TYPE_KERNARG, + HSAIL_PARAM_TYPE_ARGUMENT, + HSAIL_PARAM_TYPE_RETURN, + HSAIL_PARAM_TYPE_CALL_PARAM, + HSAIL_PARAM_TYPE_CALL_RET + }; + + /// HSAILParam - Definition of a HSAIL kernarg/arg variable + struct HSAILParam { + HSAILParamType Type; + unsigned Offset; // Parameter offset in its segment + const Argument *Arg; // Original function argument if any + }; + + DenseMap AllParams; + DenseMap ParamNames; + DenseMap ParamTypes; + SmallVector ArgumentParams; + SmallVector ReturnParams; + SmallVector CallArgParams; + SmallVector CallRetParams; + + unsigned addParam(HSAILParamType ParamType, Type *Ty, + const StringRef ParamName); + + const DataLayout *DL; + +public: + typedef DenseMap::const_iterator names_iterator; + typedef SmallVector::const_iterator param_iterator; + + HSAILParamManager(const DataLayout *_DL) : DL(_DL){}; + ~HSAILParamManager(); + + param_iterator arg_begin() const { return ArgumentParams.begin(); } + param_iterator arg_end() const { return ArgumentParams.end(); } + param_iterator ret_begin() const { return ReturnParams.begin(); } + param_iterator ret_end() const { return ReturnParams.end(); } + param_iterator call_arg_begin() const { return CallArgParams.begin(); } + param_iterator call_arg_end() const { return CallArgParams.end(); } + param_iterator call_ret_begin() const { return CallRetParams.begin(); } + param_iterator call_ret_end() const { return CallRetParams.end(); } + + /// addArgumentParam - Returns a new variable used as an argument. + /// AS is an address space of the argument. + unsigned addArgumentParam(unsigned AS, const Argument &Arg, + const StringRef ParamName); + + /// addReturnParam - Returns a new variable used as a return argument. + unsigned addReturnParam(Type *Ty, const StringRef ParamName); + + /// addCallArgParam - Returns a new variable used as a call actual argument. + unsigned addCallArgParam(Type *Ty, const StringRef ParamName); + + /// addCallRetParam - Returns a new variable used as a call actual return + /// argument. + unsigned addCallRetParam(Type *Ty, const StringRef ParamName); + + /// addParamName - Saves a persistent copy of Param Name + void addParamName(std::string Name, unsigned Index); + + /// addParamType - Saves the type of the parameter + void addParamType(Type *pTy, unsigned Index); + + /// getParamName - Returns the name of the parameter as a string. + const char *getParamName(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return ParamNames.find(Param)->second; + } + + /// getParamType - Returns the type of the parameter + Type *getParamType(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return ParamTypes.find(Param)->second; + } + + /// getParamSize - Returns the size of the parameter in bits. + unsigned getParamSize(unsigned Param) const { + return DL->getTypeStoreSize(getParamType(Param)); + } + + /// getParamOffset - Returns an offset of the parameter in its segment if + /// available, or UINT_MAX if unknown. + unsigned getParamOffset(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Offset; + } + + /// getParamOffset - Returns an offset of the parameter in its segment if + /// available, or UINT_MAX if unknown. + const Argument *getParamArg(unsigned Param) const { + assert(AllParams.count(Param) == 1 && "Param has not been defined!"); + return AllParams.find(Param)->second.Arg; + } + + /// Return parameter by its offset. + /// Offset is updated to refer to the parameter base address. + /// If parameter is not found returns UINT_MAX. + unsigned getParamByOffset(unsigned &Offset) const; + + unsigned getParamByOffset(int64_t &Offset) const { + if (Offset >= UINT_MAX || Offset < 0) + return UINT_MAX; + unsigned o = (unsigned)Offset; + unsigned r = getParamByOffset(o); + Offset = o; + return r; + } + + /// returns a unique argument name. + static std::string mangleArg(Mangler *Mang, + StringRef ArgName, + const DataLayout &DL); +}; +} + +#endif Index: lib/Target/HSAIL/HSAILParamManager.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILParamManager.cpp @@ -0,0 +1,168 @@ +//=== HSAILParamManager.cpp - kernel/function arguments ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HSAILParamManager class. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "HSAILParamManager.h" +#include "HSAILOpaqueTypes.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Mangler.h" +#include + +using namespace llvm; + +HSAILParamManager::~HSAILParamManager() { + // Special handling for teardown of ParamNames + for (names_iterator I = ParamNames.begin(), E = ParamNames.end(); I != E; + ++I) { + // Delete malloc'ed name strings + free(I->second); + } + ParamNames.clear(); +} + +unsigned HSAILParamManager::addParam(HSAILParamType ParamType, Type *Ty, + const StringRef ParamName) { + HSAILParam Param; + Param.Type = ParamType; + Param.Arg = nullptr; + SmallVector *ParamList = 0; + const char *DefName = 0; + + std::string Name; + + switch (ParamType) { + case HSAIL_PARAM_TYPE_KERNARG: + case HSAIL_PARAM_TYPE_ARGUMENT: + ParamList = &ArgumentParams; + DefName = "__arg_p"; + break; + case HSAIL_PARAM_TYPE_RETURN: + ParamList = &ReturnParams; + DefName = "__ret_"; + break; + case HSAIL_PARAM_TYPE_CALL_PARAM: + ParamList = &CallArgParams; + DefName = "__param_"; + break; + case HSAIL_PARAM_TYPE_CALL_RET: + ParamList = &CallRetParams; + DefName = "__ret_"; + break; + } + + if (ParamName.empty()) { + Name = DefName; + Name += utostr(ParamList->size()); + } else { + Name = ParamName; + } + + unsigned prev_offset = 0; + unsigned prev_size = 0; + if (ParamList->size() > 0) { + unsigned prev_param = (*ParamList)[ParamList->size() - 1]; + prev_offset = getParamOffset(prev_param); + prev_size = getParamSize(prev_param); + } + if (prev_offset == UINT_MAX || GetOpaqueType(Ty)) { + Param.Offset = UINT_MAX; + } else { + unsigned alignment = DL->getABITypeAlignment(Ty); + // W/a for RT alignment of vectors to element size: + if (ParamType == HSAIL_PARAM_TYPE_KERNARG && Ty->isVectorTy()) + alignment = DL->getABITypeAlignment(Ty->getVectorElementType()); + assert(alignment != 0); + Param.Offset = (prev_offset + prev_size + alignment - 1) & ~(alignment - 1); + } + + unsigned Index = AllParams.size(); + AllParams[Index] = Param; + ParamList->push_back(Index); + + addParamName(Name, Index); + addParamType(Ty, Index); + + return Index; +} + +unsigned HSAILParamManager::addArgumentParam(unsigned AS, const Argument &Arg, + const StringRef ParamName) { + unsigned Param = + addParam((AS == HSAILAS::ARG_ADDRESS) ? HSAIL_PARAM_TYPE_ARGUMENT + : HSAIL_PARAM_TYPE_KERNARG, + Arg.getType(), ParamName); + AllParams.find(Param)->second.Arg = &Arg; + return Param; +} + +unsigned HSAILParamManager::addReturnParam(Type *Ty, + const StringRef ParamName) { + return addParam(HSAIL_PARAM_TYPE_RETURN, Ty, ParamName); +} + +unsigned HSAILParamManager::addCallArgParam(Type *Ty, + const StringRef ParamName) { + return addParam(HSAIL_PARAM_TYPE_CALL_PARAM, Ty, ParamName); +} + +unsigned HSAILParamManager::addCallRetParam(Type *Ty, + const StringRef ParamName) { + return addParam(HSAIL_PARAM_TYPE_CALL_RET, Ty, ParamName); +} + +void HSAILParamManager::addParamName(std::string Name, unsigned Index) { + // malloc arg name string so that it persists through compilation + char *name = (char *)malloc(Name.length() + 1); + strcpy(name, Name.c_str()); + ParamNames[Index] = name; +} + +void HSAILParamManager::addParamType(Type *pTy, unsigned Index) { + ParamTypes[Index] = pTy; +} + +unsigned HSAILParamManager::getParamByOffset(unsigned &Offset) const { + unsigned param_no = ArgumentParams.size(); + for (unsigned i = 0; i < param_no; i++) { + unsigned param = ArgumentParams[i]; + unsigned o = getParamOffset(param); + if (o == UINT_MAX) + break; + if ((o <= Offset) && ((o + getParamSize(param)) > Offset)) { + // Parameter found and addressing is in bound. + Offset -= o; + return param; + } + } + return UINT_MAX; +} + +/// returns a unique argument name for flattened vector component. +std::string HSAILParamManager::mangleArg(Mangler *Mang, + StringRef ArgName, + const DataLayout &DL) { + if (ArgName.empty()) + return ""; + + std::string NameStrStorage; + + { + raw_string_ostream NameStr(NameStrStorage); + Mang->getNameWithPrefix(NameStr, ArgName, DL); + } + + return std::move(NameStrStorage); +} Index: lib/Target/HSAIL/HSAILPatterns.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILPatterns.td @@ -0,0 +1,24 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Store pattern fragments. +// +//===----------------------------------------------------------------------===// + + +def ADDR : ComplexPattern; + +// FIXME: This should really be a complex pattern on the root load, +// but there seem to be problems trying to use a ComplexPattern with a +// chain in a Pat. +def LoadAddr : ComplexPattern; +def StoreAddr : ComplexPattern; +def AtomicAddr : ComplexPattern; + +def SetCCPat : ComplexPattern; Index: lib/Target/HSAIL/HSAILProfiles.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILProfiles.td @@ -0,0 +1,14 @@ +//===---------------- HSAILILProfiles.td - HSAIL Profiles -----------------===// +// These are used for custom selection dag type profiles +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Call Sequence Profiles +//===----------------------------------------------------------------------===// +def SDTIL_CallSeqStart : SDCallSeqStart<[ + SDTCisVT<0, i32> + ]>; +def SDTIL_CallSeqEnd : SDCallSeqEnd<[ + SDTCisVT<0, i32>, SDTCisVT<1, i32> + ]>; + Index: lib/Target/HSAIL/HSAILRegisterInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILRegisterInfo.h @@ -0,0 +1,76 @@ +//=- HSAILRegisterInfo.h - HSAIL Register Information Impl --------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the HSAIL implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILREGISTERINFO_H +#define LLVM_LIB_TARGET_HSAIL_HSAILREGISTERINFO_H + +#include "llvm/Target/TargetRegisterInfo.h" + +#define GET_REGINFO_HEADER +#include "HSAILGenRegisterInfo.inc" + +namespace llvm { +class Type; +class TargetInstrInfo; +class HSAILSubtarget; + +/// DWARFFlavour - Flavour of dwarf regnumbers +/// +namespace DWARFFlavour { +enum { HSAIL_Generic = 0 }; +} + +class HSAILRegisterInfo : public HSAILGenRegisterInfo { +private: + HSAILSubtarget &ST; + + void lowerSpillB1(MachineBasicBlock::iterator II, int FrameIndex) const; + void lowerRestoreB1(MachineBasicBlock::iterator II, int FrameIndex) const; + +public: + HSAILRegisterInfo(HSAILSubtarget &st); + + const uint16_t * + getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + + BitVector getRegsAvailable(const TargetRegisterClass *RC) const; + + BitVector getReservedRegs(const MachineFunction &MF) const override; + + bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; + + bool requiresRegisterScavenging(const MachineFunction &MF) const override; + + bool requiresFrameIndexScavenging(const MachineFunction &MF) const override; + + bool saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const override; + + void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS = nullptr) const override; + + unsigned getFrameRegister(const MachineFunction &MF) const override; + + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const override; + + const TargetRegisterClass *getPhysRegClass(unsigned Reg) const; +}; + +} // End llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILRegisterInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILRegisterInfo.cpp @@ -0,0 +1,334 @@ +//===- HSAILRegisterInfo.cpp - HSAIL Register Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the HSAIL implementation of the TargetRegisterInfo class. +// This file is responsible for the frame pointer elimination optimization +// on HSAIL. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILRegisterInfo.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILSubtarget.h" +#include "HSAILTargetMachine.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +//#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "hsail-reginfo" + +using namespace llvm; + +static cl::opt HSAILReg32PressureLimit( + "hsail-reg32-pressure-limit", cl::Hidden, cl::init(24), + cl::desc("Register pressure limit for 32 bit HSAIL registers")); + +static cl::opt HSAILReg64PressureLimit( + "hsail-reg64-pressure-limit", cl::Hidden, cl::init(18), + cl::desc("Register pressure limit for 64 bit HSAIL registers")); + +static cl::opt HSAILRegSlots( + "hsail-reg-slots", cl::Hidden, cl::init(0), + cl::desc("A number of 64-bit slots allocated for $s registers")); + +HSAILRegisterInfo::HSAILRegisterInfo(HSAILSubtarget &st) + : HSAILGenRegisterInfo(0, 0), ST(st) {} + +const uint16_t * +HSAILRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + static const uint16_t CalleeSavedRegs[] = {0}; + return CalleeSavedRegs; +} + +BitVector +HSAILRegisterInfo::getRegsAvailable(const TargetRegisterClass *RC) const { + BitVector Mask(getNumRegs()); + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; + ++I) + Mask.set(*I); + return Mask; +} + +BitVector HSAILRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + + // We can have up to 128 s-registers, but we should have (s + 2*d + 4*q) <= + // 128. + // Let's calulate the number of 32 and 64 bit VRs used in the function + // and partition register file accordingly. + HSAILMachineFunctionInfo *MFI = const_cast( + MF.getInfo()); + unsigned NumSlotsTotal = HSAIL::GPR64RegClass.getNumRegs(); + // Default register file partitioning 64 s-regs + 32 d-regs, RegSlots = 32. + unsigned RegSlots = NumSlotsTotal / 2; + + // First query for this function, calculate register use + if (MFI->getRegisterPartitioning() == 0) { + const MachineRegisterInfo &RI = MF.getRegInfo(); + unsigned rc32 = 0, rc64 = 0; + for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) { + switch (RI.getRegClass(index2VirtReg(i))->getSize()) { + case 4: + rc32++; + break; + case 8: + rc64++; + break; + } + } + + if (HSAILRegSlots > 0) { + RegSlots = HSAILRegSlots; + } else { + // Calculate register file partitioning. We have 64 allocatable slots + // which + // are either 1 d-register or a pair of s-registers. 8 slots are reserved + // for 16 s-registers $s0..$s15, 8 are for 8 d-registers $d0..$d7. + // Default partitioning is 64 s-registers + 32 d-registers, which is + // RegSlots = 32 + + // If we have a small amount of 64 bit VRs, but high 32 bit register + // pressure reallocate slots to decrease 64 bit registers + if (rc64 < (NumSlotsTotal - RegSlots) && rc32 > (RegSlots * 2)) { + RegSlots = NumSlotsTotal - rc64; + } + // The opposite situation, we have a small demand on 32 bit registers but + // high pressure for 64 bit + else if (rc32 < (RegSlots * 2) && rc64 > (NumSlotsTotal - RegSlots)) { + RegSlots = (rc32 + 1) / 2; + } + } + + // Always preserve room for at least 16 s-registers and 8 d-registers + if (RegSlots < 8) + RegSlots = 8; + else if (RegSlots > (NumSlotsTotal - 8)) + RegSlots = NumSlotsTotal - 8; + + MFI->setRegisterPartitioning(RegSlots); + DEBUG(dbgs() << "\nFunction: " << MF.getFunction()->getName() + << " VR count: 32 bit = " << rc32 << ", 64 bit = " << rc64 + << ", register file partitioning: " << RegSlots * 2 << " $s + " + << NumSlotsTotal - RegSlots << " $d\n\n"); + } else { + RegSlots = MFI->getRegisterPartitioning(); + } + + unsigned Reg; + unsigned LastSReg = HSAIL::S0 + HSAIL::GPR32RegClass.getNumRegs() - 1; + for (Reg = HSAIL::S0 + RegSlots * 2; Reg <= LastSReg; ++Reg) { + Reserved.set(Reg); + } + unsigned LastDReg = HSAIL::D0 + HSAIL::GPR64RegClass.getNumRegs() - 1; + for (Reg = HSAIL::D0 + (NumSlotsTotal - RegSlots); Reg <= LastDReg; ++Reg) { + Reserved.set(Reg); + } + + return Reserved; +} + +bool HSAILRegisterInfo::trackLivenessAfterRegAlloc( + const MachineFunction &MF) const { + // TODO: Only enable when post-RA scheduling is enabled and this is needed. + return true; +} + +bool HSAILRegisterInfo::requiresRegisterScavenging( + const MachineFunction &MF) const { + return true; +} + +bool HSAILRegisterInfo::requiresFrameIndexScavenging( + const MachineFunction &MF) const { + // If we have spilled condition registers, we create virtual registers when + // replacing the pseudos. + const HSAILMachineFunctionInfo *Info = MF.getInfo(); + return Info->hasSpilledCRs(); +} + +void HSAILRegisterInfo::lowerSpillB1(MachineBasicBlock::iterator II, + int FrameIndex) const { + MachineBasicBlock *MBB = II->getParent(); + MachineFunction *MF = MBB->getParent(); + MachineInstr &MI = *II; + const HSAILInstrInfo *TII = ST.getInstrInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + unsigned TempGPR32 = MRI.createVirtualRegister(&HSAIL::GPR32RegClass); + + DebugLoc DL = MI.getDebugLoc(); + BuildMI(*MBB, II, DL, TII->get(HSAIL::CVT_U32_B1), TempGPR32) + .addImm(0) // ftz + .addImm(0) // round + .addImm(BRIG_TYPE_U32) // destTypedestLength + .addImm(BRIG_TYPE_B1) // srcTypesrcLength + .addOperand(MI.getOperand(0)); + + MI.setDesc(TII->get(HSAIL::ST_U32)); + MI.getOperand(0).setReg(TempGPR32); + MI.getOperand(0).setIsKill(); + + MachineOperand *TypeOp = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength); + TypeOp->setImm(BRIG_TYPE_U32); +} + +void HSAILRegisterInfo::lowerRestoreB1(MachineBasicBlock::iterator II, + int FrameIndex) const { + MachineBasicBlock *MBB = II->getParent(); + MachineInstr &MI = *II; + DebugLoc DL = MI.getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + const HSAILInstrInfo *TII = ST.getInstrInfo(); + + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned TempGPR32 = MRI.createVirtualRegister(&HSAIL::GPR32RegClass); + + BuildMI(*MBB, ++II, DL, TII->get(HSAIL::CVT_B1_U32), DestReg) + .addImm(0) // ftz + .addImm(0) // round + .addImm(BRIG_TYPE_B1) // destTypedestLength + .addImm(BRIG_TYPE_U32) // srcTypesrcLength + .addReg(TempGPR32, RegState::Kill); + + MI.setDesc(TII->get(HSAIL::LD_U32)); + MI.getOperand(0).setReg(TempGPR32); + MI.getOperand(0).setIsDef(); + + MachineOperand *TypeOp = TII->getNamedOperand(MI, HSAIL::OpName::TypeLength); + TypeOp->setImm(BRIG_TYPE_U32); +} + +bool HSAILRegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { + MachineFunction *MF = MBB.getParent(); + HSAILMachineFunctionInfo *Info = MF->getInfo(); + MCContext &Ctx = MF->getContext(); + const HSAILInstrInfo *TII = ST.getInstrInfo(); + + // We only rely on the RegScavenger in rare cases for the temp registers + // needed when expanding spill_b1 / restore_b1. + assert(RC == &HSAIL::GPR32RegClass && + "Only expecting s register spills during emergencies"); + + DebugLoc DL = I->getDebugLoc(); + + // We don't really have a stack, and there's no real reason we can't create + // more stack objects. We will define a special spill variable for this case. + Info->setHasScavengerSpill(); + + MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef("%___spillScavenge")); + BuildMI(MBB, I, DL, TII->get(HSAIL::ST_U32)) + .addReg(Reg, RegState::Kill) // src + .addSym(Sym) // address_base + .addReg(HSAIL::NoRegister) // address_reg + .addImm(0) // address_offset + .addImm(BRIG_TYPE_U32) // TypeLength + .addImm(HSAILAS::SPILL_ADDRESS) // segment + .addImm(RC->getAlignment()); // align + + BuildMI(MBB, UseMI, DL, TII->get(HSAIL::LD_U32), Reg) + .addSym(Sym) // address_base + .addReg(HSAIL::NoRegister) // address_reg + .addImm(0) // address_offset + .addImm(BRIG_TYPE_U32) // TypeLength + .addImm(HSAILAS::SPILL_ADDRESS) // segment + .addImm(RC->getAlignment()) // align + .addImm(BRIG_WIDTH_1) // width + .addImm(0); // mask + + return true; +} + +void HSAILRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + assert(SPAdj == 0 && "Unexpected"); + MachineInstr &MI = *II; + unsigned Opcode = MI.getOpcode(); + MachineFunction *MF = MI.getParent()->getParent(); + const MachineFrameInfo *MFI = MF->getFrameInfo(); + MCContext &Ctx = MF->getContext(); + + assert(HSAIL::getNamedOperandIdx(Opcode, HSAIL::OpName::address) == + static_cast(FIOperandNum) && + "Frame index should only be used for address operands"); + + MachineOperand &Base = MI.getOperand(FIOperandNum); + int FrameIndex = Base.getIndex(); + + if (Opcode == HSAIL::SPILL_B1) + lowerSpillB1(II, FrameIndex); + else if (Opcode == HSAIL::RESTORE_B1) + lowerRestoreB1(II, FrameIndex); + + StringRef SymName = MFI->isSpillSlotObjectIndex(FrameIndex) ? + "%__spillStack" : "%__privateStack"; + MCSymbol *Sym = Ctx.getOrCreateSymbol(SymName); + + Base.ChangeToMCSymbol(Sym); +} + +//===--------------------------------------------------------------------===// +/// Debug information queries. + +unsigned HSAILRegisterInfo::getFrameRegister(const MachineFunction &MF) const { + // This value is unused in LLVM + return HSAIL::NoRegister; +} + +unsigned HSAILRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + if (RC == &HSAIL::GPR32RegClass) { + return HSAILReg32PressureLimit; + } + if (RC == &HSAIL::GPR64RegClass) { + return HSAILReg64PressureLimit; + } + return 0; +} + +const TargetRegisterClass * +HSAILRegisterInfo::getPhysRegClass(unsigned Reg) const { + assert(!TargetRegisterInfo::isVirtualRegister(Reg)); + + static const TargetRegisterClass *BaseClasses[] = { + &HSAIL::GPR32RegClass, &HSAIL::GPR64RegClass, &HSAIL::CRRegClass}; + + for (const TargetRegisterClass *BaseClass : BaseClasses) { + if (BaseClass->contains(Reg)) + return BaseClass; + } + return nullptr; +} + +#define GET_REGINFO_TARGET_DESC +#include "HSAILGenRegisterInfo.inc" Index: lib/Target/HSAIL/HSAILRegisterInfo.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILRegisterInfo.td @@ -0,0 +1,56 @@ +//==- HSAILRegisterInfo.td - Main HSAIL Register Definition -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the HSAIL register file, defining the registers themselves, +// aliases between the registers, and the register classes built out of the +// registers. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Register definitions... +// +class HSAILReg : Register { + let Namespace = "HSAIL"; + let DwarfNumbers = [d]; +} + + +//32-bit registers for signed, unsigned integers or float values +foreach N = 0-127 in { + def S#N : HSAILReg<"$s"#N, !add(0, N)>; +} + +//64-bit registers for signed, unsigned long integers or double float values +foreach N = 0-63 in { + def D#N : HSAILReg<"$d"#N, !add(128, N)>; +} + +//1-bit control registers +foreach N = 0-7 in { + def C#N : HSAILReg<"$c"#N, !add(192, N)>; +} + +//===----------------------------------------------------------------------===// +// HSAILReg Class Definitions... now that we have all of the pieces, define the +// top-level register classes. The order specified in the register list is +// implicitly defined to be the register allocation order. +// +def GPR32 : RegisterClass<"HSAIL", [i32, f32], 32, + (sequence "S%u", 0, 127)> { +} + +def GPR64 : RegisterClass<"HSAIL", [i64, f64], 64, + (sequence "D%u", 0, 63)> { +} + +def CR : RegisterClass<"HSAIL", [i1], 32, + (sequence "C%u", 0, 7)> { + let Size = 32; +} Index: lib/Target/HSAIL/HSAILSection.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSection.h @@ -0,0 +1,43 @@ +//===- HSAILSection.h - HSAIL-specific section representation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the HSAILSection class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILSECTION_H +#define LLVM_LIB_TARGET_HSAIL_HSAILSECTION_H + +#include "llvm/MC/MCSection.h" + +namespace llvm { + +// HSAIL does not have sections. Override this so we don't get unwanted .text +// labels emitted. +class HSAILSection : public MCSection { +public: + HSAILSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {} + virtual ~HSAILSection(); + + void PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS, + const MCExpr *Subsection) const override; + + bool UseCodeAlign() const override { + return false; + } + + bool isVirtualSection() const override { + return false; + } +}; + +} // end namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILSection.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSection.cpp @@ -0,0 +1,23 @@ +//===- HSAILSection.cpp -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILSection.h" + +using namespace llvm; + + +void HSAILSection::PrintSwitchToSection(const MCAsmInfo &MAI, + raw_ostream &OS, + const MCExpr *Subsection) const { + // Do nothing. There are no sections +} + +HSAILSection::~HSAILSection() { + +} Index: lib/Target/HSAIL/HSAILSpecial.td =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSpecial.td @@ -0,0 +1,183 @@ +//===------------------------------------------------------*- tablegen -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//////////////////////////////////////////////////////////////////////////////// +// special operations + +let isAsCheapAsAMove = 1 in { + let isReMaterializable = 1 in { + def CURRENTWORKGROUPSIZE_U32 : HSAILInstBasic_1Op<"currentworkgroupsize", BrigOpcode.CURRENTWORKGROUPSIZE, Inst_U32_U32>; + } + + def CURRENTWORKITEMFLATID_U32 : HSAILInstBasic_0Op<"currentworkitemflatid", BrigOpcode.CURRENTWORKITEMFLATID, Inst_U32>; + def DIM_U32 : HSAILInstBasic_0Op<"dim", BrigOpcode.DIM, Inst_U32>; + + let isReMaterializable = 1 in { + def GRIDGROUPS_U32 : HSAILInstBasic_1Op<"gridgroups", BrigOpcode.GRIDGROUPS, Inst_U32_U32>; + defm GRIDSIZE : InstBasic_1Op_U32ImmSrc<"gridsize", BrigOpcode.GRIDSIZE>; + } + + def PACKETID_U64 : HSAILInstBasic_0Op<"packetid", BrigOpcode.PACKETID, Inst_U64>; + + let isReMaterializable = 1 in { + def WORKGROUPID_U32 : HSAILInstBasic_1Op<"workgroupid", BrigOpcode.WORKGROUPID, Inst_U32_U32>; + def WORKGROUPSIZE_U32 : HSAILInstBasic_1Op<"workgroupsize", BrigOpcode.WORKGROUPSIZE, Inst_U32_U32>; + defm WORKITEMABSID : InstBasic_1Op_U32ImmSrc<"workitemabsid", BrigOpcode.WORKITEMABSID>; + } + + defm WORKITEMFLATABSID : InstBasic_0Op_UnsignedIntTypes<"workitemflatabsid", BrigOpcode.WORKITEMFLATABSID>; + def WORKITEMFLATID_U32 : HSAILInstBasic_0Op<"workitemflatid", BrigOpcode.WORKITEMFLATID, Inst_U32>; + + let isReMaterializable = 1 in { + def WORKITEMID_U32 : HSAILInstBasic_1Op<"workitemid", BrigOpcode.WORKITEMID, Inst_U32_U32>; + } + + let hasSideEffects = 1 in { + def CLOCK_U64 : HSAILInstBasic_0Op<"clock", BrigOpcode.CLOCK, Inst_U64>; + def CUID_U32 : HSAILInstBasic_0Op<"cuid", BrigOpcode.CUID, Inst_U32>; + } + + def LANEID_U32 : HSAILInstBasic_0Op<"laneid", BrigOpcode.LANEID, Inst_U32>; + def MAXCUID_U32 : HSAILInstBasic_0Op<"maxcuid", BrigOpcode.MAXCUID, Inst_U32>; + def MAXWAVEID_U32 : HSAILInstBasic_0Op<"maxwaveid", BrigOpcode.MAXWAVEID, Inst_U32>; + def WAVEID_U32 : HSAILInstBasic_0Op<"waveid", BrigOpcode.WAVEID, Inst_U32>; +} + + +def : InstBasic_1Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_1Op_Pat; +def : GridSize_Pat; +def : GridSize_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : GridSize_Pat; +def : GridSize_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_1Op_Pat; + + +// Legacy intrinsic names +def : InstBasic_1Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_1Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_1Op_Pat; + + +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; + + +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; + + +let isNotDuplicable = 1, isBarrier = 1 in { + def BARRIER : HSAILInstBr_0Op_NoRet<"barrier", BrigOpcode.BARRIER> { + let WidthAttr = WidthAttrValues.ALL; + } + + def WAVEBARRIER : HSAILInstBr_0Op_NoRet<"wavebarrier", BrigOpcode.WAVEBARRIER> { + let WidthAttr = WidthAttrValues.WAVESIZE; + } +} + +def : InstBr_0Op_NoRet_Intrin_Pat; + +def : InstBr_0Op_NoRet_Pat; +def : InstBr_0Op_NoRet_Pat; +def : InstBr_0Op_NoRet_Pat; + + +//////////////////////////////////////////////////////////////////////////////// +// SYNC OCL 2.0 + +def MEMFENCE : InstMemFence<"memfence", BrigOpcode.MEMFENCE>; + +def : Pat< + (int_HSAIL_memfence (i32 imm:$order), (i32 imm:$scope)), + (MEMFENCE imm:$order, imm:$scope) +>; + +def : Pat< + (int_hsail_memfence (i32 imm:$order), (i32 imm:$scope)), + (MEMFENCE imm:$order, imm:$scope) +>; + + +def IMAGEFENCE : HSAILInstBasic_0Op_NoRet<"imagefence", BrigOpcode.IMAGEFENCE, Inst_Void>; +def : InstBasic_0Op_NoRet_Pat; + + +defm ACTIVELANEPERMUTE : + InstLane_ActiveLanePermute_Types<"activelanepermute", + BrigOpcode.ACTIVELANEPERMUTE>; + +def : ActiveLanePermutePat; +def : ActiveLanePermutePat; + +def ACTIVELANEID_U32 : HSAILInstLane_0Op<"activelaneid", BrigOpcode.ACTIVELANEID, Inst_U32>; +def : ActiveLaneIdPat; + +def ACTIVELANECOUNT_U32_B1 : HSAILInstLane_1Op<"activelanecount", BrigOpcode.ACTIVELANECOUNT, Inst_U32_B1>; +def : ActiveLaneCountPat; + +let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + defm NULLPTR : InstSeg_0Op_PtrTypes<"nullptr", BrigOpcode.NULLPTR>; +} + +def : InstSeg_0Op_Pat; +def : InstSeg_0Op_Pat; + +let Predicates = [LargeModel] in { + def : InstSeg_0Op_Pat; // Same as flat. + def : InstSeg_0Op_Pat; + def : InstSeg_0Op_Pat; // Same as flat. + def : InstSeg_0Op_Pat; +} + +let Predicates = [SmallModel] in { + def : InstSeg_0Op_Pat; // Same as flat. + def : InstSeg_0Op_Pat; + def : InstSeg_0Op_Pat; // Same as flat. + def : InstSeg_0Op_Pat; +} + +let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + def GROUPBASEPTR_U32 : HSAILInstBasic_0Op<"groupbaseptr", BrigOpcode.GROUPBASEPTR, Inst_U32>; + defm KERNARGBASEPTR : InstBasic_0Op_UnsignedIntTypes<"kernargbaseptr", BrigOpcode.KERNARGBASEPTR>; + +} + +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; +def : InstBasic_0Op_Pat; + +// Cross-Lane Operations +def ACTIVELANEMASK_V4_B64_B1 : HSAILInstLane_ActiveLaneMask<"activelanemask", BrigOpcode.ACTIVELANEMASK>; + Index: lib/Target/HSAIL/HSAILStoreInitializer.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILStoreInitializer.h @@ -0,0 +1,81 @@ +//===-- HSAILStoreInitializer.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILSTOREINITIALIZER_H +#define LLVM_LIB_TARGET_HSAIL_HSAILSTOREINITIALIZER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/EndianStream.h" + +#include + +namespace llvm { + +class AsmPrinter; +class APInt; +class Constant; +class DataLayout; +class GlobalValue; +class MCExpr; +class StringRef; +class Type; + +class StoreInitializer { +public: + // Track offset wher the the address of a global needs to be inserted. + struct VarInitOffset { + uint64_t BaseOffset; + const MCExpr *Expr; + + VarInitOffset(uint64_t Offset, const MCExpr *E) + : BaseOffset(Offset), Expr(E) {} + }; + +private: + const DataLayout &DL; + AsmPrinter &AP; + uint32_t InitEltSize; + bool IsFPElt; + + SmallString<1024> m_data; + raw_svector_ostream OS; + support::endian::Writer LE; + + std::vector VarInitAddresses; + + void initVarWithAddress(const GlobalValue *GV, StringRef Var, + const APInt &Offset); + + void printFloat(uint32_t, raw_ostream &O); + void printDouble(uint64_t, raw_ostream &O); + +public: + StoreInitializer(Type *EltTy, AsmPrinter &AP); + + void append(const Constant *CV, StringRef Var); + + ArrayRef varInitAddresses() const { + return makeArrayRef(VarInitAddresses); + } + + StringRef str() { return OS.str(); } + + size_t elementCount() { return dataSizeInBytes() / InitEltSize; } + + size_t dataSizeInBytes() { + // Be sure to flush the stream before computing the size. + return OS.str().size(); + } + + void print(raw_ostream &O); +}; +} + +#endif Index: lib/Target/HSAIL/HSAILStoreInitializer.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILStoreInitializer.cpp @@ -0,0 +1,266 @@ +//===-- HSAILStoreInitializer.cpp -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILStoreInitializer.h" + +#include "HSAILAsmPrinter.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +StoreInitializer::StoreInitializer(Type *EltTy, AsmPrinter &AP) + : DL(AP.getDataLayout()), AP(AP), InitEltSize(DL.getTypeAllocSize(EltTy)), + IsFPElt(EltTy->isFloatingPointTy()), m_data(), OS(m_data), LE(OS) {} + +void StoreInitializer::append(const Constant *CV, StringRef Var) { + switch (CV->getValueID()) { + case Value::ConstantArrayVal: { // Recursive type. + const ConstantArray *CA = cast(CV); + for (unsigned I = 0, E = CA->getNumOperands(); I < E; ++I) + append(cast(CA->getOperand(I)), Var); + + break; + } + case Value::ConstantDataArrayVal: { + const ConstantDataArray *CVE = cast(CV); + for (unsigned I = 0, E = CVE->getNumElements(); I < E; ++I) + append(cast(CVE->getElementAsConstant(I)), Var); + + break; + } + case Value::ConstantStructVal: { // Recursive type. + const ConstantStruct *S = cast(CV); + StructType *ST = S->getType(); + const StructLayout *SL = DL.getStructLayout(ST); + + uint64_t StructSize = DL.getTypeAllocSize(ST); + uint64_t BaseOffset = SL->getElementOffset(0); + + for (unsigned I = 0, E = S->getNumOperands(); I < E; ++I) { + Constant *Elt = cast(S->getOperand(I)); + append(Elt, Var); + + uint64_t EltSize = DL.getTypeAllocSize(Elt->getType()); + uint64_t EltOffset = SL->getElementOffset(I); + + uint64_t PaddedEltSize; + if (I == E - 1) + PaddedEltSize = BaseOffset + StructSize - EltOffset; + else + PaddedEltSize = SL->getElementOffset(I + 1) - EltOffset; + + // Match structure layout by padding with zeroes. + while (EltSize < PaddedEltSize) { + LE.write(static_cast(0)); + ++EltSize; + } + } + break; + } + case Value::ConstantVectorVal: { // Almost leaf type. + const ConstantVector *CVE = cast(CV); + VectorType *Ty = CVE->getType(); + Type *EltTy = Ty->getElementType(); + unsigned NElts = Ty->getNumElements(); + unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); + + unsigned I; + for (I = 0; I < NElts; ++I) + append(cast(CVE->getOperand(I)), Var); + + Constant *Zero = Constant::getNullValue(EltTy); + while (I < RealNElts) { + append(Zero, Var); + ++I; + } + + break; + } + case Value::ConstantDataVectorVal: { + const ConstantDataVector *CVE = cast(CV); + VectorType *Ty = CVE->getType(); + Type *EltTy = Ty->getElementType(); + unsigned NElts = Ty->getNumElements(); + unsigned RealNElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); + + unsigned I; + for (I = 0; I < NElts; ++I) + append(cast(CVE->getElementAsConstant(I)), Var); + + Constant *Zero = Constant::getNullValue(EltTy); + while (I < RealNElts) { + append(Zero, Var); + ++I; + } + + break; + } + case Value::ConstantIntVal: { + const ConstantInt *CI = cast(CV); + if (CI->getType()->isIntegerTy(1)) { + LE.write(static_cast(CI->getZExtValue() ? 1 : 0)); + } else { + switch (CI->getBitWidth()) { + case 8: + LE.write(static_cast(CI->getZExtValue())); + break; + case 16: + LE.write(static_cast(CI->getZExtValue())); + break; + case 32: + LE.write(static_cast(CI->getZExtValue())); + break; + case 64: + LE.write(static_cast(CI->getZExtValue())); + break; + } + } + break; + } + case Value::ConstantFPVal: { + const ConstantFP *CFP = cast(CV); + if (CFP->getType()->isFloatTy()) + LE.write(CFP->getValueAPF().convertToFloat()); + else if (CFP->getType()->isDoubleTy()) + LE.write(CFP->getValueAPF().convertToDouble()); + else + llvm_unreachable("unhandled ConstantFP type"); + break; + } + case Value::ConstantPointerNullVal: { + unsigned AS = CV->getType()->getPointerAddressSpace(); + if (DL.getPointerSize(AS) == 8) + LE.write(static_cast(0)); + else + LE.write(static_cast(0)); + break; + } + case Value::UndefValueVal: + case Value::ConstantAggregateZeroVal: { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + for (uint64_t I = 0; I < Size / InitEltSize; ++I) { + switch (InitEltSize) { + case 1: + LE.write(static_cast(0)); + break; + case 2: + LE.write(static_cast(0)); + break; + case 4: + LE.write(static_cast(0)); + break; + case 8: + LE.write(static_cast(0)); + break; + default: + llvm_unreachable("unhandled size"); + } + } + + break; + } + case Value::GlobalVariableVal: + case Value::ConstantExprVal: { + const MCExpr *Expr = AP.lowerConstant(CV); + + // Offset that address needs to be written at is the current size of the + // buffer. + uint64_t CurrOffset = dataSizeInBytes(); + + unsigned Size = DL.getTypeAllocSize(CV->getType()); + switch (Size) { + case 4: + LE.write(static_cast(0)); + break; + case 8: + LE.write(static_cast(0)); + break; + default: + llvm_unreachable("unhandled size"); + } + + VarInitAddresses.emplace_back(CurrOffset, Expr); + break; + } + default: + llvm_unreachable("unhandled initializer"); + } +} + +// FIXME: Duplicated in HSAILAsmPrinter +void StoreInitializer::printFloat(uint32_t Val, raw_ostream &O) { + O << format("0F%08" PRIx32, Val); +} + +void StoreInitializer::printDouble(uint64_t Val, raw_ostream &O) { + O << format("0D%016" PRIx64, Val); +} + +void StoreInitializer::print(raw_ostream &O) { + StringRef Str = str(); + assert(Str.size() % InitEltSize == 0); + + if (InitEltSize == 1) { + for (size_t I = 0, E = Str.size(); I != E; ++I) { + if (I != 0) + O << ", "; + + O << (static_cast(Str[I]) & 0xff); + } + + return; + } + + for (unsigned I = 0, E = Str.size(); I != E; I += InitEltSize) { + if (I != 0) + O << ", "; + + const char *Ptr = &Str.data()[I]; + switch (InitEltSize) { + case 4: { + uint32_t Tmp; + std::memcpy(&Tmp, Ptr, 4); + + if (IsFPElt) + printFloat(Tmp, O); + else + O << Tmp; + break; + } + case 8: { + uint64_t Tmp; + std::memcpy(&Tmp, Ptr, 8); + + if (IsFPElt) + printDouble(Tmp, O); + else + O << Tmp; + break; + } + case 2: { + uint16_t Tmp; + std::memcpy(&Tmp, Ptr, 2); + + assert(!IsFPElt && "half not implemented"); + O << Tmp; + break; + } + default: + llvm_unreachable("Unhandled element size"); + } + } +} Index: lib/Target/HSAIL/HSAILSubtarget.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSubtarget.h @@ -0,0 +1,173 @@ +//===---- HSAILSubtarget.h - Define Subtarget for the HSAIL -----*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the HSAIL specific subclass of TargetSubtarget. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILSUBTARGET_H +#define LLVM_LIB_TARGET_HSAIL_HSAILSUBTARGET_H + +#include "HSAIL.h" +#include "HSAILFrameLowering.h" +#include "HSAILInstrInfo.h" +#include "HSAILISelLowering.h" + +#include "llvm/ADT/SmallVector.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define GET_SUBTARGETINFO_HEADER +#include "HSAILGenSubtargetInfo.inc" + +namespace llvm { +class HSAILTargetMachine; + +class HSAILSamplerHandle { +private: + std::string mSym; + unsigned int mVal; + bool mIsRO; + bool mEmitted; + +public: + HSAILSamplerHandle(HSAILSamplerHandle ©) { + mSym = copy.mSym; + mVal = copy.mVal; + mIsRO = copy.mIsRO; + mEmitted = copy.mEmitted; + } + + HSAILSamplerHandle(/*bool isImage, */ const char *sym) { + mSym = sym; + mVal = 0; + mIsRO = false; + mEmitted = false; + } + + HSAILSamplerHandle(/*bool isImage, */ unsigned int u) { + mVal = u; + mIsRO = false; + mEmitted = false; + } + + inline void setSym(std::string str) { mSym = str; } + inline std::string getSym() { return mSym; } + inline unsigned int getVal() { return mVal; } + inline bool isRO() { return mIsRO; } + inline void setRO() { mIsRO = true; } + inline bool isEmitted() { return mEmitted; } + inline void setEmitted() { mEmitted = true; } +}; + +class HSAILImageHandles { +private: + // Image and sampler kernel args + SmallVector HSAILImageArgs; + + // Sampler initializers + SmallVector HSAILSamplers; + unsigned index; + +public: + HSAILImageHandles() { index = 0; } + // TODO_HSA Add a destructor + + SmallVector getSamplerHandles() { + return HSAILSamplers; + } + HSAILSamplerHandle *getSamplerHandle(unsigned index); + + unsigned findOrCreateImageHandle(const char *sym); + unsigned findOrCreateSamplerHandle(unsigned int u); + + std::string getImageSymbol(unsigned index); + std::string getSamplerSymbol(unsigned index); + unsigned getSamplerValue(unsigned index); + + bool isSamplerSym(std::string sym); + + void finalize(); + void clearImageArgs(); +}; + +class HSAILKernelManager; + +class HSAILSubtarget : public HSAILGenSubtargetInfo { +protected: + Triple TargetTriple; + std::string DevName; + bool IsLargeModel; + bool HasImages; + bool IsGCN; + + HSAILFrameLowering FrameLowering; + std::unique_ptr TLInfo; + std::unique_ptr InstrInfo; + + // FIXME: It makes no sense for this to be here. + HSAILImageHandles *imageHandles; + +public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + + HSAILSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + HSAILTargetMachine &TM); + HSAILSubtarget &initializeSubtargetDependencies(StringRef GPU, StringRef FS); + + const HSAILRegisterInfo *getRegisterInfo() const override { + return &getInstrInfo()->getRegisterInfo(); + } + + const HSAILInstrInfo *getInstrInfo() const override { + return InstrInfo.get(); + } + + const HSAILTargetLowering *getTargetLowering() const override { + return TLInfo.get(); + } + + const HSAILFrameLowering *getFrameLowering() const override { + return &FrameLowering; + } + + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + bool isFullProfile() const { + return true; + } + + bool isLargeModel() const { + return IsLargeModel; + } + + bool isSmallModel() const { + return !IsLargeModel; + } + + bool hasImages() const { + return HasImages; + } + + bool isGCN() const { + return IsGCN; + } + + // FIXME: Remove this + bool supportMetadata30() const { return true; } + + StringRef getDeviceName() const { return DevName; } + + HSAILImageHandles *getImageHandles() const { return imageHandles; } +}; + +} // End llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILSubtarget.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILSubtarget.cpp @@ -0,0 +1,117 @@ +//===------ HSAILSubtarget.cpp - Define Subtarget for the HSAIL -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the HSAIL specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#include "HSAILSubtarget.h" + +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "subtarget" + +#define GET_SUBTARGETINFO_CTOR +#define GET_SUBTARGETINFO_TARGET_DESC +#include "HSAILGenSubtargetInfo.inc" + +using namespace llvm; + +HSAILSubtarget::HSAILSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + HSAILTargetMachine &TM) + : HSAILGenSubtargetInfo(TT, CPU, FS), TargetTriple(TT), + DevName(CPU.empty() ? "generic" : CPU.str()), + IsLargeModel(TargetTriple.getArch() == Triple::hsail64), + HasImages(false), + IsGCN(false), + FrameLowering(TargetFrameLowering::StackGrowsUp, 16, 0), TLInfo(), + InstrInfo(), imageHandles(new HSAILImageHandles()) { + initializeSubtargetDependencies(CPU, FS); + + InstrInfo.reset(new HSAILInstrInfo(*this)); + TLInfo.reset(new HSAILTargetLowering(TM, *this)); +} + +HSAILSubtarget &HSAILSubtarget::initializeSubtargetDependencies(StringRef GPU, + StringRef FS) { + ParseSubtargetFeatures(GPU, FS); + return *this; +} + +// +// Support for processing Image and Sampler kernel args and operands. +// +unsigned HSAILImageHandles::findOrCreateImageHandle(const char *sym) { + // Check for image arg with same value already present + std::string symStr = sym; + for (unsigned i = 0; i < HSAILImageArgs.size(); i++) { + if (HSAILImageArgs[i] == symStr) { + return i; + } + } + HSAILImageArgs.push_back(symStr); + return HSAILImageArgs.size() - 1; +} + +unsigned HSAILImageHandles::findOrCreateSamplerHandle(unsigned int u) { + // Check for handle with same value already present + for (unsigned i = 0; i < HSAILSamplers.size(); i++) { + if (getSamplerValue(i) == u) { + return i; + } + } + HSAILSamplerHandle *handle = new HSAILSamplerHandle(u); + HSAILSamplers.push_back(handle); + return HSAILSamplers.size() - 1; +} + +HSAILSamplerHandle *HSAILImageHandles::getSamplerHandle(unsigned index) { + assert(index < HSAILSamplers.size() && "Invalid sampler index"); + return HSAILSamplers[index]; +} + +std::string HSAILImageHandles::getImageSymbol(unsigned index) { + assert(index < HSAILImageArgs.size() && "Invalid image arg index"); + return HSAILImageArgs[index]; +} + +std::string HSAILImageHandles::getSamplerSymbol(unsigned index) { + assert(index < HSAILSamplers.size() && "Invalid sampler index"); + return HSAILSamplers[index]->getSym(); +} + +unsigned HSAILImageHandles::getSamplerValue(unsigned index) { + assert(index < HSAILSamplers.size() && "Invalid sampler index"); + return HSAILSamplers[index]->getVal(); +} + +bool HSAILImageHandles::isSamplerSym(std::string sym) { + for (unsigned i = 0; i < HSAILSamplers.size(); i++) { + if (getSamplerSymbol(i) == sym) { + return true; + } + } + return false; +} + +void HSAILImageHandles::finalize() { + // printf("ImageHandles before finalize\n"); + // dump(); + char buf[16]; + for (unsigned i = 0; i < HSAILSamplers.size(); i++) { + if (getSamplerSymbol(i).empty()) { + sprintf(buf, "%s%u", "__Samp", index); + HSAILSamplers[i]->setSym(buf); + index++; + } + } +} + +void HSAILImageHandles::clearImageArgs() { HSAILImageArgs.clear(); } Index: lib/Target/HSAIL/HSAILTargetMachine.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILTargetMachine.h @@ -0,0 +1,101 @@ +//=-- HSAILTargetMachine.h - Define TargetMachine for the HSAIL ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the HSAIL specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILTARGETMACHINE_H +#define LLVM_LIB_TARGET_HSAIL_HSAILTARGETMACHINE_H + +#include "HSAILIntrinsicInfo.h" +#include "HSAILSubtarget.h" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class HSAILTargetMachine : public LLVMTargetMachine { +private: + HSAILSubtarget Subtarget; + HSAILIntrinsicInfo IntrinsicInfo; + TargetLoweringObjectFile *TLOF; + +public: +public: + HSAILTargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions & + Options, Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + const HSAILIntrinsicInfo *getIntrinsicInfo() const override { + return &IntrinsicInfo; + } + + const HSAILSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + + const HSAILSubtarget *getSubtargetImpl(const Function &) const override { + return &Subtarget; + } + + TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF; } + + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + + CodeGenFileType HSAILFileType; + +public: + bool addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, + CodeGenFileType FT, bool DisableVerify = true, + AnalysisID StartBefore = nullptr, + AnalysisID StartAfter = nullptr, + AnalysisID StopAfter = nullptr, + MachineFunctionInitializer *MFInitializer = nullptr) override; +}; + +class HSAIL_32TargetMachine : public HSAILTargetMachine { +public: + HSAIL_32TargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + void dump(raw_ostream &O); +}; + +class HSAIL_64TargetMachine : public HSAILTargetMachine { +public: + HSAIL_64TargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); +}; + +class HSAILPassConfig : public TargetPassConfig { +public: + HSAILPassConfig(HSAILTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + // Pass Pipeline Configuration + void addIRPasses() override; + void addPreEmitPass() override; + bool addPreISel() override; + bool addInstSelector() override; + void addPreRegAlloc() override; + void addPostRegAlloc() override; +}; +} // End llvm namespace + +#endif Index: lib/Target/HSAIL/HSAILTargetMachine.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILTargetMachine.cpp @@ -0,0 +1,168 @@ +//===-- HSAILTargetMachine.cpp - Define TargetMachine for the HSAIL -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the HSAIL specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#include "HSAILTargetMachine.h" +#include "HSAILELFTargetObjectFile.h" + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +extern "C" void LLVMInitializeHSAILTarget() { + // Register the target. + RegisterTargetMachine X(TheHSAIL_32Target); + RegisterTargetMachine Y(TheHSAIL_64Target); +} + +extern "C" void LLVMInitializeBRIGAsmPrinter(); + +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + return new HSAILTargetObjectFile(); +} + +static StringRef computeDataLayout(const Triple &T) { + if (T.getArch() == Triple::hsail64) { + return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" + "-p6:32:32-p7:64:64-p8:32:32-p9:64:64-i64:64" + "-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" + "-v512:512-v1024:1024-v2048:2048" + "-n32:64"; + } + + return "e-p:32:32-i64:64" + "-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" + "-v512:512-v1024:1024-v2048:2048" + "-n32:64"; +} + +// Hack to prevent weird standard OS directives from being printed when the +// triple is not fully specified. e.g. on a OS X host, there is no other way to +// disable printing .macosx_version_min at the start of the module. +LLVM_READONLY +static Triple getTripleNoOS(Triple TT) { + if (TT.getOS() != Triple::UnknownOS) + TT.setOS(Triple::UnknownOS); + + return TT; +} + +HSAILTargetMachine::HSAILTargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, computeDataLayout(getTripleNoOS(TT)), + getTripleNoOS(TT), CPU, FS, Options, RM, CM, OL), + Subtarget(getTripleNoOS(TT), CPU, FS, *this), IntrinsicInfo(this), + TLOF(createTLOF(getTripleNoOS(TT))) { + initAsmInfo(); +} + +bool HSAILTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FT, + bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, + AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) { + return LLVMTargetMachine::addPassesToEmitFile(PM, Out, FT, DisableVerify, + StartAfter, StopAfter, + MFInitializer); +} + +TargetPassConfig *HSAILTargetMachine::createPassConfig(PassManagerBase &PM) { + return new HSAILPassConfig(this, PM); +} + +void HSAILPassConfig::addIRPasses() { + addPass(createHSAILAlwaysInlinePass()); + addPass(createAlwaysInlinerPass()); + + // AddrSpaceCast optimization and lowering. Add dead code elimination + // to eliminate dead instructions (AddrSpaceCast, etc.). + TargetPassConfig::addIRPasses(); +} + +bool HSAILPassConfig::addPreISel() { + addPass(createLCSSAPass()); // Required by early CFG opts + + return true; +} + +bool HSAILPassConfig::addInstSelector() { + HSAILTargetMachine &HSATM = getTM(); + // return + // HSAILTargetMachine::addInstSelector(*PM,HSATM.Options,HSATM.getOptLevel()); + // mOptLevel = OptLevel; + // Install an instruction selector. + + addPass(createHSAILISelDag(HSATM)); + +#if 0 + addPass(&DeadMachineInstructionElimID); + if (EnableUniformOps) { + addPass(createHSAILUniformOperations(HSATM)); + } +#endif + + return false; +} +void HSAILPassConfig::addPreEmitPass() { } +void HSAILPassConfig::addPreRegAlloc() { } +void HSAILPassConfig::addPostRegAlloc() { } + +//===----------------------------------------------------------------------===// +// HSAIL_32Machine functions +//===----------------------------------------------------------------------===// +HSAIL_32TargetMachine::HSAIL_32TargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : HSAILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { + // Check for mismatch in target triple settings and data layout. Note the + // target + // triple comes from the module (unless overridden on command line). It's just + // a + // warning, but users should know if they're specifying --march=hsail-64 on a + // 32-bit module or --march=hsail on a 64-bit module. + if (TT.getArch() == Triple::hsail64) { + errs() << "warning: target triple '" << TT.str() + << "' does not match target 'hsail', expecting hsail-pc-amdopencl.\n"; + } +} + +//===----------------------------------------------------------------------===// +// HSAIL_64Machine functions +//===----------------------------------------------------------------------===// +HSAIL_64TargetMachine::HSAIL_64TargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : HSAILTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { + // Check for mismatch in target triple settings and data layout. Note the + // target + // triple comes from the module (unless overridden on command line). It's just + // a + // warning, but users should know if they're specifying --march=hsail-64 on a + // 32-bit module. + if (TT.getArch() == Triple::hsail) { + errs() << "warning: target triple '" << TT.str() + << "' does not match target 'hsail64', " + "expecting hsail64pc-amdopencl.\n"; + } +} Index: lib/Target/HSAIL/HSAILUtil/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILUtil/CMakeLists.txt @@ -0,0 +1,2 @@ +include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR}/..) +add_llvm_library(LLVMHSAILUtil HSAILUtil.cpp) Index: lib/Target/HSAIL/HSAILUtil/HSAILUtil.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILUtil/HSAILUtil.cpp @@ -0,0 +1,18 @@ +//===-- HSAILUtil.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is just to split getNamedOperandIdx out from the rest of the target +// library to avoid the HSAILAsmPrinter library depending on it. + +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include "../MCTargetDesc/HSAILMCTargetDesc.h" + +#define GET_INSTRINFO_NAMED_OPS +#include "HSAILGenInstrInfo.inc" Index: lib/Target/HSAIL/HSAILUtilityFunctions.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILUtilityFunctions.h @@ -0,0 +1,62 @@ +//===-- HSAILUtilityFunctions.h - Utility Functions Header ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file provides declarations for functions that are used across different +// classes and provide various conversions or utility to shorten the code +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_HSAILUTILITYFUNCTIONS_H +#define LLVM_LIB_TARGET_HSAIL_HSAILUTILITYFUNCTIONS_H + +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILTargetMachine.h" + +namespace llvm { +class GlobalVariable; +class HSAILMachineFunctionInfo; +class MachineInstr; +class TargetRegisterClass; +class Type; + +namespace HSAIL { + +BrigType getBrigType(Type *Ty, const DataLayout &DL, bool Signed = false); + +/// Returns the type to use when expressing the type in HSAIL. If this will be +/// expressed as an HSAIL array, set NElts to the number of elements, +/// otherwise 0. +Type *analyzeType(Type *Ty, unsigned &NElts, const DataLayout &DL); + +unsigned getAlignTypeQualifier(Type *ty, const DataLayout &DL, + bool isPreferred); + +static inline bool isConv(const MachineInstr *MI) { + return MI->getDesc().TSFlags & HSAILInstrFlags::IS_CONV; +} + +static inline bool isImageInst(const MachineInstr *MI) { + return MI->getDesc().TSFlags & HSAILInstrFlags::IS_IMAGEINST; +} + +bool isKernelFunc(const Function *F); +bool isSPIRModule(const Module &M); + +bool notUsedInKernel(const GlobalVariable *GV); +bool isIgnoredGV(const GlobalVariable *GV); + +bool sanitizedGlobalValueName(StringRef, SmallVectorImpl &); +bool sanitizeGlobalValueName(llvm::GlobalValue *GV); + +} // End namespace HSAIL + +} // End namespace llvm + +#endif Index: lib/Target/HSAIL/HSAILUtilityFunctions.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/HSAILUtilityFunctions.cpp @@ -0,0 +1,330 @@ +//===-- HSAILUtilityFunctions.cpp - HSAIL Utility Functions ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILUtilityFunctions.h" +#include "HSAILInstrInfo.h" +#include "HSAILISelLowering.h" +#include "HSAILMachineFunctionInfo.h" +#include "HSAILOpaqueTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Debug.h" +#include +#include +#include + +using namespace llvm; + +namespace llvm { + +namespace HSAIL { + +BrigType getBrigType(Type *type, const DataLayout &DL, bool Signed) { + switch (type->getTypeID()) { + case Type::VoidTyID: + return BRIG_TYPE_NONE; // TODO_HSA: FIXME: void + case Type::FloatTyID: + return BRIG_TYPE_F32; + case Type::DoubleTyID: + return BRIG_TYPE_F64; + case Type::IntegerTyID: + if (type->isIntegerTy(8)) { + return Signed ? BRIG_TYPE_S8 : BRIG_TYPE_U8; + } else if (type->isIntegerTy(16)) { + return Signed ? BRIG_TYPE_S16 : BRIG_TYPE_U16; + } else if (type->isIntegerTy(32)) { + return Signed ? BRIG_TYPE_S32 : BRIG_TYPE_U32; + } else if (type->isIntegerTy(64)) { + return Signed ? BRIG_TYPE_S64 : BRIG_TYPE_U64; + } else if (type->isIntegerTy(1)) { + return BRIG_TYPE_B1; + } else + llvm_unreachable("Unhandled type"); + break; + case Type::PointerTyID: { + if (OpaqueType OT = GetOpaqueType(type)) { + if (IsImage(OT)) + return BRIG_TYPE_RWIMG; + if (OT == Sampler) + return BRIG_TYPE_SAMP; + } + unsigned AS = cast(type)->getAddressSpace(); + return DL.getPointerSize(AS) == 8 ? BRIG_TYPE_U64 : BRIG_TYPE_U32; + } + case Type::StructTyID: + // Treat struct as array of bytes. + return BRIG_TYPE_U8_ARRAY; + case Type::VectorTyID: + return static_cast( + getBrigType(type->getScalarType(), DL, Signed) | BRIG_TYPE_ARRAY); + case Type::ArrayTyID: + return static_cast( + getBrigType(cast(type)->getElementType(), DL, Signed) | + BRIG_TYPE_ARRAY); + default: + type->dump(); + llvm_unreachable("Unhandled type"); + } +} + +Type *analyzeType(Type *Ty, unsigned &NElts, const DataLayout &DL) { + // Scan through levels of nested arrays until we get to something that can't + // be expressed as a simple array element. + if (ArrayType *AT = dyn_cast(Ty)) { + Type *EltTy; + NElts = 1; + + while (AT) { + NElts *= AT->getNumElements(); + EltTy = AT->getElementType(); + AT = dyn_cast(EltTy); + } + + unsigned EltElts = ~0u; + + // We could have arrays of vectors or structs. + Type *Tmp = analyzeType(EltTy, EltElts, DL); + + // We only need to multiply if this was a nested vector type. + if (EltElts != 0) + NElts *= EltElts; + + return Tmp; + } + + if (VectorType *VT = dyn_cast(Ty)) { + Type *EltTy = VT->getElementType(); + + // We need to correct the number of elements in the case of 3x vectors since + // in memory they occupy 4 elements. + NElts = DL.getTypeAllocSize(Ty) / DL.getTypeAllocSize(EltTy); + assert(NElts >= VT->getNumElements()); + + // FIXME: It's not clear what the behavior of these is supposed to be and + // aren't consistently handled. + if (EltTy->isIntegerTy(1)) + report_fatal_error("i1 vector initializers not handled"); + + return EltTy; + } + + if (isa(Ty)) { + NElts = DL.getTypeAllocSize(Ty); + return Type::getInt8Ty(Ty->getContext()); + } + + assert(!Ty->isAggregateType()); + + NElts = 0; + + // Arrays of i1 are not supported, and must be replaced with byte sized + // elements. + if (Ty->isIntegerTy(1)) + return Type::getInt8Ty(Ty->getContext()); + + return Ty; +} + +unsigned getAlignTypeQualifier(Type *ty, const DataLayout &DL, + bool isPreferred) { + unsigned align = 0; + + if (ArrayType *ATy = dyn_cast(ty)) + ty = ATy->getElementType(); + + if (IsImage(ty) || IsSampler(ty)) + return 8; + + align = + isPreferred ? DL.getPrefTypeAlignment(ty) : DL.getABITypeAlignment(ty); + + unsigned max_align = (1 << (BRIG_ALIGNMENT_MAX - BRIG_ALIGNMENT_1)); + if (align > max_align) + align = max_align; + + assert(align && (align & (align - 1)) == 0); + + return align; +} + +static bool isKernelFunc(StringRef str) { + if (str.startswith("__OpenCL_") && str.endswith("_kernel")) + return true; + return false; +} + +bool isKernelFunc(const Function *F) { + if (CallingConv::SPIR_KERNEL == F->getCallingConv()) + return true; + + return isKernelFunc(F->getName()); +} + +/// \brief Check if a global variable is used in any "real" code. +/// +/// We iterate over the entire tree of users, looking for any use in +/// the kernel code. The traversal ignores any use in metadata. There +/// is only one way to use a global variable in metadata --- by using +/// it in a global variable that occurs in the "llvm.metadata" +/// section. (MDNode is not a subclass of llvm::User, and hence they +/// can't occur in the user tree.) The traversal returns early if the +/// user is an instruction. +/// +/// Assumption: Instructions do not occur in metadata. Also, we don't +/// worry about dead code so late in the flow. +bool notUsedInKernel(const GlobalVariable *GV) { + SmallVector worklist; // arbitrary choice of 32 + + // We only inspect the users of GV, hence GV itself is never + // inserted in the worklist. + worklist.append(GV->user_begin(), GV->user_end()); + + while (!worklist.empty()) { + const User *user = worklist.pop_back_val(); + + if (const GlobalValue *GUser = dyn_cast(user)) { + if (std::string("llvm.metadata") == GUser->getSection()) + continue; + } + + if (isa(user)) + return false; + + worklist.append(user->user_begin(), user->user_end()); + } + + return true; +} + +bool sanitizedGlobalValueName(StringRef Name, SmallVectorImpl &Out) { + // Poor man's regexp check. + static const StringRef Syntax("abcdefghijklmnopqrstuvwxyz" + "_." + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789"); + + static const StringRef FirstCharSyntax("abcdefghijklmnopqrstuvwxyz" + "_" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + + static const StringRef Digits("0123456789"); + + SmallString<32> NewName; + + // The second character (after the prefix) of an identifier must must be a + // letter or underscore. + if (FirstCharSyntax.find(Name[0]) == StringRef::npos) { + NewName += '_'; + Name = Name.drop_front(1); + } + + size_t p = 0; + size_t q = 0; + + while (q != StringRef::npos) { + q = Name.find_first_not_of(Syntax, p); + // If q == p, the character at p itself violates the syntax. + if (q != p) { + // Consume everything before q, not including q (even if q == npos). + NewName += Name.slice(p, q); + } + + // If not found, do not replace. + if (q == StringRef::npos) + break; + + // Replace found character with underscore. + NewName += '_'; + + // Then we directly move on to the next character: skip q. + p = q + 1; + } + + // opt may generate empty names and names started with digit. + if (Name.empty() || Digits.find(Name[0]) != StringRef::npos || + !Name.equals(NewName)) { + // Add prefix to show that the name was replaced by HSA. + // LLVM's setName adds seq num in case of name duplicating. + Out.append(NewName.begin(), NewName.end()); + return true; + } + + return false; +} + +/// \brief Rename a global variable to satisfy HSAIL syntax. +/// +/// We simply drop all characters from the name that are disallowed by +/// HSAIL. When the resulting string is applied as a name, it will be +/// automatically modified to resolve conflicts. +bool sanitizeGlobalValueName(GlobalValue *GV) { + SmallString<256> NewName; + + if (sanitizedGlobalValueName(GV->getName(), NewName)) { + // Add prefix to show that the name was replaced by HSA. + // LLVM's setName adds seq num in case of name duplicating. + GV->setName(Twine("__hsa_replaced_") + Twine(NewName)); + return true; + } + + return false; +} + +bool isIgnoredGV(const GlobalVariable *GV) { + unsigned AS = GV->getType()->getAddressSpace(); + + if (AS == HSAILAS::PRIVATE_ADDRESS || AS == HSAILAS::GROUP_ADDRESS) + return true; + + if (GV->hasLocalLinkage() && notUsedInKernel(GV)) + return true; + + StringRef GVname = GV->getName(); + + // FIXME: Should be removed + return GVname.startswith("sgv") || GVname.startswith("fgv") || + GVname.startswith("lvgv") || GVname.startswith("pvgv") || + // TODO_HSA: suppress emitting annotations as global declarations for + // now. These are labelled as "llvm.metadata". How should we handle + // these? + GVname.startswith("llvm.argtypeconst.annotations") || + GVname.startswith("llvm.argtypename.annotations") || + GVname.startswith("llvm.constpointer.annotations") || + GVname.startswith("llvm.global.annotations") || + GVname.startswith("llvm.image.annotations") || + GVname.startswith("llvm.readonlypointer.annotations") || + GVname.startswith("llvm.restrictpointer.annotations") || + GVname.startswith("llvm.signedOrSignedpointee.annotations") || + GVname.startswith("llvm.volatilepointer.annotations") || + GVname.startswith("llvm.sampler.annotations"); +} + +/// \brief Check whether the module contains SPIR +/// +/// We cannot use metadata such as "opencl.spir.version", or the +/// target triple, because these can come in via the builtins library +/// as well. Instead we rely on the fact that there are no kernels in +/// the builtins library, and hence "opencl.kernels" is absent. +bool isSPIRModule(const Module &M) { + return M.getNamedMetadata("opencl.kernels"); +} + +} // End namespace HSAIL + +} // End namespace llvm Index: lib/Target/HSAIL/InstPrinter/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/InstPrinter/CMakeLists.txt @@ -0,0 +1,4 @@ +add_llvm_library(LLVMHSAILAsmPrinter + HSAILInstPrinter.cpp + ) +target_link_libraries(LLVMHSAILAsmPrinter PRIVATE LLVMHSAILUtil) \ No newline at end of file Index: lib/Target/HSAIL/InstPrinter/HSAILInstPrinter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/InstPrinter/HSAILInstPrinter.h @@ -0,0 +1,170 @@ +//===-- HSAILInstPrinter.h - HSAIL MC Inst -> ASM interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_INSTPRINTER_HSAILINSTPRINTER_H +#define LLVM_LIB_TARGET_HSAIL_INSTPRINTER_HSAILINSTPRINTER_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + +class HSAILInstPrinter : public MCInstPrinter { +private: + bool InArgScope; + +public: + HSAILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI); + + // Autogenerated by tblgen + void printInstruction(const MCInst *MI, raw_ostream &O); + static const char *getRegisterName(unsigned RegNo); + + void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot, + const MCSubtargetInfo &STI) override; + +private: + // void printU8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printB1(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printB8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printB16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printB32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printB64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printB128(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printSAMP(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printROIMG(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printWOIMG(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printRWIMG(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printSIG32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printSIG64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU8X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU8X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU8X16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU16X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU16X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU16X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printU32X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU32X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printU64X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS8X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS8X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS8X16(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS16X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS16X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printS32X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS32X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printS64X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF16X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF16X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printF16X8(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printF32X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printF32X4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + // void printF64X2(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printUImmediate(uint64_t Imm, raw_ostream &O); + void printSImmediate(int64_t Imm, raw_ostream &O); + void printAddrMode3Op(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printVec2Op(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printVec3Op(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printVec4Op(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printV2U32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV2F32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV2U64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV2F64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printV3U32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV3F32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV3U64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV3F64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printV4U32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV4F32(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV4U64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV4F64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printFTZ(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printNoNull(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printV4(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigAlignment(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printArgDeclAlignment(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printArraySize(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printEquiv(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigAllocation(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigAluModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigAtomicOperation(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigCompareOperation(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigControlDirective(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printBrigExecutableModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigImageChannelOrder(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigImageChannelType(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigImageGeometry(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigImageQuery(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigLinkage(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigMachineModel(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigMemoryModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigMemoryOrder(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigMemoryScope(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigPack(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigProfile(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printBrigRound(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigSamplerAddressing(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + void printBrigSamplerCoordNormalization(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printBrigSamplerFilter(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigSamplerQuery(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigSegCvtModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printBrigSegment(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigType(const MCInst *MI, unsigned OpNo, raw_ostream &O); + + void printBrigVariableModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O); + + void printBrigWidth(const MCInst *MI, unsigned OpNo, raw_ostream &O); +}; + +} // End namespace llvm + +#endif Index: lib/Target/HSAIL/InstPrinter/HSAILInstPrinter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/InstPrinter/HSAILInstPrinter.cpp @@ -0,0 +1,1729 @@ +//===-- HSAILInstPrinter.cpp - HSAIL MC Inst -> ASM -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +/// \file +//===----------------------------------------------------------------------===// + +#include "HSAILInstPrinter.h" +#include "HSAIL.h" +#include "HSAILBrigDefs.h" +#include "HSAILInstrInfo.h" + +#include "MCTargetDesc/HSAILMCTargetDesc.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +HSAILInstPrinter::HSAILInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, + const MCRegisterInfo &MRI) + : MCInstPrinter(MAI, MII, MRI), + InArgScope(false) {} + +void HSAILInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, + StringRef Annot, const MCSubtargetInfo &STI) { + + if (MI->getOpcode() == HSAIL::ARG_SCOPE_START) { + InArgScope = true; + printInstruction(MI, OS); + printAnnotation(OS, Annot); + return; + } + + if (MI->getOpcode() == HSAIL::ARG_SCOPE_END) { + InArgScope = false; + printInstruction(MI, OS); + printAnnotation(OS, Annot); + return; + } + + // Indent any instructions in a call scope. + if (InArgScope) + OS << '\t'; + + printInstruction(MI, OS); + + // Special case call because there appears to be no way to handle variable_ops + // in the generated printer. + if (MI->getOpcode() == HSAIL::CALL) { + // First operand is called function, and should have been automatically + // printed. We just need to specially handle the variable_ops. + unsigned I = 1; + + OS << '('; + + const MCOperand *Op = &MI->getOperand(1); + while (!Op->isImm()) { + printOperand(MI, I++, OS); + Op = &MI->getOperand(I); + } + + // Return value and argument symbols are delimited with a 0 value. + assert((Op->isImm() && Op->getImm() == 0) && + "Unexpected target call instruction operand list!"); + + // Skip the zero. + ++I; + + OS << ") ("; + + unsigned N = MI->getNumOperands(); + while (I < N) { + printOperand(MI, I++, OS); + + if (I < N) + OS << ", "; + } + + OS << ");"; + } + + printAnnotation(OS, Annot); +} + +void HSAILInstPrinter::printUImmediate(uint64_t Imm, raw_ostream &O) { + O << format("%" PRIu64, Imm); +} + +void HSAILInstPrinter::printSImmediate(int64_t Imm, raw_ostream &O) { + O << format("%" PRId64, Imm); +} + +void HSAILInstPrinter::printU32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << format("%" PRIu32, Op.getImm()); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printU64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << format("%" PRIu64, Op.getImm()); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printS32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << format("%" PRId32, Op.getImm()); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printS64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << format("%" PRId64, Op.getImm()); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printF16(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printF32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isFPImm()) { + O << format("0F%08" PRIx32, FloatToBits(static_cast(Op.getFPImm()))); + return; + } + + assert(!Op.isImm()); + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printF64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isFPImm()) { + O << format("0D%016" PRIx64, DoubleToBits(Op.getFPImm())); + return; + } + + assert(!Op.isImm()); + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printB1(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + O << (Op.getImm() ? '1' : '0'); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printB32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printU32(MI, OpNo, O); +} + +void HSAILInstPrinter::printB64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + printU64(MI, OpNo, O); +} + +void HSAILInstPrinter::printU8X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + union Bytes { + uint32_t I; + uint8_t Byte[4]; + } U; + + U.I = Op.getImm(); + O << format("u8x4(%" PRIu8 ",%" PRIu8 ",%" PRIu8 ",%" PRIu8 ")", U.Byte[3], + U.Byte[2], U.Byte[1], U.Byte[0]); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printU8X8(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printU16X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + union Bytes { + uint32_t I; + uint16_t U16[2]; + } U; + + U.I = Op.getImm(); + O << format("u16x2(%" PRIu16 ",%" PRIu16 ")", U.U16[1], U.U16[0]); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printU16X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printU32X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isImm()) { + union Bytes { + uint64_t I; + uint32_t U32[2]; + } U; + + U.I = Op.getImm(); + O << format("u32x2(%" PRIu32 ",%" PRIu32 ")", U.U32[1], U.U32[0]); + return; + } + + printOperand(MI, OpNo, O); +} + +void HSAILInstPrinter::printS8X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printS8X8(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printS16X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printS16X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printS32X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printF16X2(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printF16X4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + llvm_unreachable("Implement me"); +} + +void HSAILInstPrinter::printAddrMode3Op(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &BaseOp = MI->getOperand(OpNo); + const MCOperand &RegOp = MI->getOperand(OpNo + 1); + const MCOperand &OffsetOp = MI->getOperand(OpNo + 2); + + assert(RegOp.isReg() && OffsetOp.isImm()); + + unsigned AddrReg = RegOp.getReg(); + int64_t Offset = OffsetOp.getImm(); + + if (BaseOp.isReg()) { + // FIXME: Why is this allowed to be a register? + assert(BaseOp.getReg() == HSAIL::NoRegister); + } else if (BaseOp.isExpr()) { + O << '['; + BaseOp.getExpr()->print(O, &MAI); + O << ']'; + } else if (BaseOp.isImm()) + O << BaseOp.getImm(); + else + llvm_unreachable("Unexpected type for base address operand"); + + // Have both register and immediate offset. + if (AddrReg != HSAIL::NoRegister && Offset != 0) { + O << '[' << getRegisterName(AddrReg); + + // If the offset is negative, it will be printed with the appropriate - + // already. + if (Offset > 0) + O << '+'; + + O << formatDec(Offset) << ']'; + return; + } + + // Only register offset. + if (AddrReg != HSAIL::NoRegister) { + O << '[' << getRegisterName(AddrReg) << ']'; + return; + } + + // Only have immediate offset. + if (Offset != 0) + O << '[' << formatDec(Offset) << ']'; +} + +void HSAILInstPrinter::printVec2Op(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printOperand(MI, OpNo + 0, O); + O << ", "; + printOperand(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printVec3Op(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printOperand(MI, OpNo + 0, O); + O << ", "; + printOperand(MI, OpNo + 1, O); + O << ", "; + printOperand(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printVec4Op(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printOperand(MI, OpNo + 0, O); + O << ", "; + printOperand(MI, OpNo + 1, O); + O << ", "; + printOperand(MI, OpNo + 2, O); + O << ", "; + printOperand(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printV2U32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU32(MI, OpNo + 0, O); + O << ", "; + printU32(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printV2F64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF64(MI, OpNo + 0, O); + O << ", "; + printF64(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printV2U64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU64(MI, OpNo + 0, O); + O << ", "; + printU64(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printV2F32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF32(MI, OpNo + 0, O); + O << ", "; + printF32(MI, OpNo + 1, O); + O << ')'; +} + +void HSAILInstPrinter::printV3U32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU32(MI, OpNo + 0, O); + O << ", "; + printU32(MI, OpNo + 1, O); + O << ", "; + printU32(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printV3F32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF32(MI, OpNo + 0, O); + O << ", "; + printF32(MI, OpNo + 1, O); + O << ", "; + printF32(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printV3U64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU64(MI, OpNo + 0, O); + O << ", "; + printU64(MI, OpNo + 1, O); + O << ", "; + printU64(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printV3F64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF64(MI, OpNo + 0, O); + O << ", "; + printF64(MI, OpNo + 1, O); + O << ", "; + printF64(MI, OpNo + 2, O); + O << ')'; +} + +void HSAILInstPrinter::printV4U32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU32(MI, OpNo + 0, O); + O << ", "; + printU32(MI, OpNo + 1, O); + O << ", "; + printU32(MI, OpNo + 2, O); + O << ", "; + printU32(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printV4F32(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF32(MI, OpNo + 0, O); + O << ", "; + printF32(MI, OpNo + 1, O); + O << ", "; + printF32(MI, OpNo + 2, O); + O << ", "; + printF32(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printV4U64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printU64(MI, OpNo + 0, O); + O << ", "; + printU64(MI, OpNo + 1, O); + O << ", "; + printU64(MI, OpNo + 2, O); + O << ", "; + printU64(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printV4F64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + O << '('; + printF64(MI, OpNo + 0, O); + O << ", "; + printF64(MI, OpNo + 1, O); + O << ", "; + printF64(MI, OpNo + 2, O); + O << ", "; + printF64(MI, OpNo + 3, O); + O << ')'; +} + +void HSAILInstPrinter::printFTZ(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << "_ftz"; +} + +void HSAILInstPrinter::printNoNull(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << "_nonull"; +} + +void HSAILInstPrinter::printV4(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << "_v4"; +} + +void HSAILInstPrinter::printBrigAlignment(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Align = MI->getOperand(OpNo).getImm(); + if (Align != 1) + O << "_align(" << formatDec(Align) << ')'; +} + +static bool isNaturalAlignment(BrigType BT, unsigned Align) { + switch (Align) { + case 4: { + switch (BT) { + case BRIG_TYPE_U32: + case BRIG_TYPE_F32: + case BRIG_TYPE_B32: + case BRIG_TYPE_S32: + case BRIG_TYPE_U8X4: + case BRIG_TYPE_S8X4: + case BRIG_TYPE_F16X2: + case BRIG_TYPE_S16X2: + case BRIG_TYPE_U16X2: + return true; + default: + return false; + } + } + case 8: { + switch (BT) { + case BRIG_TYPE_U64: + case BRIG_TYPE_B64: + case BRIG_TYPE_F64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F16X4: + case BRIG_TYPE_F32X2: + case BRIG_TYPE_ROIMG: + case BRIG_TYPE_RWIMG: + case BRIG_TYPE_S16X4: + case BRIG_TYPE_S32X2: + case BRIG_TYPE_S8X8: + case BRIG_TYPE_SAMP: + case BRIG_TYPE_SIG32: + case BRIG_TYPE_SIG64: + case BRIG_TYPE_U16X4: + case BRIG_TYPE_U32X2: + case BRIG_TYPE_U8X8: + case BRIG_TYPE_WOIMG: + return true; + default: + return false; + } + } + case 1: { + switch (BT) { + case BRIG_TYPE_B1: + case BRIG_TYPE_B8: + case BRIG_TYPE_S8: + case BRIG_TYPE_U8: + return true; + default: + return false; + } + } + case 2: { + switch (BT) { + case BRIG_TYPE_U16: + case BRIG_TYPE_B16: + case BRIG_TYPE_S16: + case BRIG_TYPE_F16: + return true; + default: + return false; + } + } + case 16: { + switch (BT) { + case BRIG_TYPE_B128: + case BRIG_TYPE_F16X8: + case BRIG_TYPE_F32X4: + case BRIG_TYPE_F64X2: + case BRIG_TYPE_S16X8: + case BRIG_TYPE_S32X4: + case BRIG_TYPE_S64X2: + case BRIG_TYPE_S8X16: + case BRIG_TYPE_U16X8: + case BRIG_TYPE_U32X4: + case BRIG_TYPE_U64X2: + case BRIG_TYPE_U8X16: + return true; + default: + return false; + } + } + default: + return false; + } +} + +void HSAILInstPrinter::printArgDeclAlignment(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Align = MI->getOperand(OpNo).getImm(); + + int TypeLengthIdx = + HSAIL::getNamedOperandIdx(MI->getOpcode(), HSAIL::OpName::TypeLength); + BrigType BT = static_cast(MI->getOperand(TypeLengthIdx).getImm()); + + // Don't print align declaration if it uses the alignment implied in this + // context. This isn't necessary, but it matches what libHSAIL's disassembler + // produces. + if (!isNaturalAlignment(BT, Align)) + O << "align(" << formatDec(Align) << ") "; +} + +void HSAILInstPrinter::printArraySize(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + int64_t Size = MI->getOperand(OpNo).getImm(); + if (Size != 0) + O << '[' << Size << ']'; +} + +void HSAILInstPrinter::printEquiv(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Equiv = MI->getOperand(OpNo).getImm(); + if (Equiv != 0) + O << "_equiv(" << formatDec(Equiv) << ')'; +} + +void HSAILInstPrinter::printBrigAllocation(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ALLOCATION_NONE: + O << "NONE"; + break; + case BRIG_ALLOCATION_PROGRAM: + O << "PROGRAM"; + break; + case BRIG_ALLOCATION_AGENT: + O << "AGENT"; + break; + case BRIG_ALLOCATION_AUTOMATIC: + O << "AUTOMATIC"; + break; + } +} + +void HSAILInstPrinter::printBrigAluModifierMask(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ALU_FTZ: + O << "FTZ"; + break; + } +} + +void HSAILInstPrinter::printBrigAtomicOperation(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ATOMIC_ADD: + O << "_add"; + break; + case BRIG_ATOMIC_AND: + O << "_and"; + break; + case BRIG_ATOMIC_CAS: + O << "_cas"; + break; + case BRIG_ATOMIC_EXCH: + O << "_exch"; + break; + case BRIG_ATOMIC_LD: + O << "_ld"; + break; + case BRIG_ATOMIC_MAX: + O << "_max"; + break; + case BRIG_ATOMIC_MIN: + O << "_min"; + break; + case BRIG_ATOMIC_OR: + O << "_or"; + break; + case BRIG_ATOMIC_ST: + O << "_st"; + break; + case BRIG_ATOMIC_SUB: + O << "_sub"; + break; + case BRIG_ATOMIC_WRAPDEC: + O << "_wrapdec"; + break; + case BRIG_ATOMIC_WRAPINC: + O << "_wrapinc"; + break; + case BRIG_ATOMIC_XOR: + O << "_xor"; + break; + case BRIG_ATOMIC_WAIT_EQ: + O << "_wait_eq"; + break; + case BRIG_ATOMIC_WAIT_NE: + O << "_wait_ne"; + break; + case BRIG_ATOMIC_WAIT_LT: + O << "_wait_lt"; + break; + case BRIG_ATOMIC_WAIT_GTE: + O << "_wait_gte"; + break; + case BRIG_ATOMIC_WAITTIMEOUT_EQ: + O << "_waittimeout_eq"; + break; + case BRIG_ATOMIC_WAITTIMEOUT_NE: + O << "_waittimeout_ne"; + break; + case BRIG_ATOMIC_WAITTIMEOUT_LT: + O << "_waittimeout_lt"; + break; + case BRIG_ATOMIC_WAITTIMEOUT_GTE: + O << "_waittimeout_gte"; + break; + } +} + +void HSAILInstPrinter::printBrigCompareOperation(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_COMPARE_EQ: + O << "_eq"; + break; + case BRIG_COMPARE_NE: + O << "_ne"; + break; + case BRIG_COMPARE_LT: + O << "_lt"; + break; + case BRIG_COMPARE_LE: + O << "_le"; + break; + case BRIG_COMPARE_GT: + O << "_gt"; + break; + case BRIG_COMPARE_GE: + O << "_ge"; + break; + case BRIG_COMPARE_EQU: + O << "_equ"; + break; + case BRIG_COMPARE_NEU: + O << "_neu"; + break; + case BRIG_COMPARE_LTU: + O << "_ltu"; + break; + case BRIG_COMPARE_LEU: + O << "_leu"; + break; + case BRIG_COMPARE_GTU: + O << "_gtu"; + break; + case BRIG_COMPARE_GEU: + O << "_geu"; + break; + case BRIG_COMPARE_NUM: + O << "_num"; + break; + case BRIG_COMPARE_NAN: + O << "_nan"; + break; + case BRIG_COMPARE_SEQ: + O << "_seq"; + break; + case BRIG_COMPARE_SNE: + O << "_sne"; + break; + case BRIG_COMPARE_SLT: + O << "_slt"; + break; + case BRIG_COMPARE_SLE: + O << "_sle"; + break; + case BRIG_COMPARE_SGT: + O << "_sgt"; + break; + case BRIG_COMPARE_SGE: + O << "_sge"; + break; + case BRIG_COMPARE_SGEU: + O << "_sgeu"; + break; + case BRIG_COMPARE_SEQU: + O << "_sequ"; + break; + case BRIG_COMPARE_SNEU: + O << "_sneu"; + break; + case BRIG_COMPARE_SLTU: + O << "_sltu"; + break; + case BRIG_COMPARE_SLEU: + O << "_sleu"; + break; + case BRIG_COMPARE_SNUM: + O << "_snum"; + break; + case BRIG_COMPARE_SNAN: + O << "_snan"; + break; + case BRIG_COMPARE_SGTU: + O << "_sgtu"; + break; + } +} + +void HSAILInstPrinter::printBrigControlDirective(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_CONTROL_ENABLEBREAKEXCEPTIONS: + O << "enablebreakexceptions"; + break; + case BRIG_CONTROL_ENABLEDETECTEXCEPTIONS: + O << "enabledetectexceptions"; + break; + case BRIG_CONTROL_MAXDYNAMICGROUPSIZE: + O << "maxdynamicgroupsize"; + break; + case BRIG_CONTROL_MAXFLATGRIDSIZE: + O << "maxflatgridsize"; + break; + case BRIG_CONTROL_MAXFLATWORKGROUPSIZE: + O << "maxflatworkgroupsize"; + break; + case BRIG_CONTROL_REQUIREDDIM: + O << "requireddim"; + break; + case BRIG_CONTROL_REQUIREDGRIDSIZE: + O << "requiredgridsize"; + break; + case BRIG_CONTROL_REQUIREDWORKGROUPSIZE: + O << "requiredworkgroupsize"; + break; + case BRIG_CONTROL_REQUIRENOPARTIALWORKGROUPS: + O << "requirenopartialworkgroups"; + break; + } +} + +void HSAILInstPrinter::printBrigExecutableModifierMask(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_EXECUTABLE_DEFINITION: + O << "DEFINITION"; + break; + } +} + +void HSAILInstPrinter::printBrigImageChannelOrder(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_CHANNEL_ORDER_A: + O << "a"; + break; + case BRIG_CHANNEL_ORDER_R: + O << "r"; + break; + case BRIG_CHANNEL_ORDER_RX: + O << "rx"; + break; + case BRIG_CHANNEL_ORDER_RG: + O << "rg"; + break; + case BRIG_CHANNEL_ORDER_RGX: + O << "rgx"; + break; + case BRIG_CHANNEL_ORDER_RA: + O << "ra"; + break; + case BRIG_CHANNEL_ORDER_RGB: + O << "rgb"; + break; + case BRIG_CHANNEL_ORDER_RGBX: + O << "rgbx"; + break; + case BRIG_CHANNEL_ORDER_RGBA: + O << "rgba"; + break; + case BRIG_CHANNEL_ORDER_BGRA: + O << "bgra"; + break; + case BRIG_CHANNEL_ORDER_ARGB: + O << "argb"; + break; + case BRIG_CHANNEL_ORDER_ABGR: + O << "abgr"; + break; + case BRIG_CHANNEL_ORDER_SRGB: + O << "srgb"; + break; + case BRIG_CHANNEL_ORDER_SRGBX: + O << "srgbx"; + break; + case BRIG_CHANNEL_ORDER_SRGBA: + O << "srgba"; + break; + case BRIG_CHANNEL_ORDER_SBGRA: + O << "sbgra"; + break; + case BRIG_CHANNEL_ORDER_INTENSITY: + O << "intensity"; + break; + case BRIG_CHANNEL_ORDER_LUMINANCE: + O << "luminance"; + break; + case BRIG_CHANNEL_ORDER_DEPTH: + O << "depth"; + break; + case BRIG_CHANNEL_ORDER_DEPTH_STENCIL: + O << "depth_stencil"; + break; + } +} + +void HSAILInstPrinter::printBrigImageChannelType(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_CHANNEL_TYPE_SNORM_INT8: + O << "snorm_int8"; + break; + case BRIG_CHANNEL_TYPE_SNORM_INT16: + O << "snorm_int16"; + break; + case BRIG_CHANNEL_TYPE_UNORM_INT8: + O << "unorm_int8"; + break; + case BRIG_CHANNEL_TYPE_UNORM_INT16: + O << "unorm_int16"; + break; + case BRIG_CHANNEL_TYPE_UNORM_INT24: + O << "unorm_int24"; + break; + case BRIG_CHANNEL_TYPE_UNORM_SHORT_555: + O << "unorm_short_555"; + break; + case BRIG_CHANNEL_TYPE_UNORM_SHORT_565: + O << "unorm_short_565"; + break; + case BRIG_CHANNEL_TYPE_UNORM_INT_101010: + O << "unorm_int_101010"; + break; + case BRIG_CHANNEL_TYPE_SIGNED_INT8: + O << "signed_int8"; + break; + case BRIG_CHANNEL_TYPE_SIGNED_INT16: + O << "signed_int16"; + break; + case BRIG_CHANNEL_TYPE_SIGNED_INT32: + O << "signed_int32"; + break; + case BRIG_CHANNEL_TYPE_UNSIGNED_INT8: + O << "unsigned_int8"; + break; + case BRIG_CHANNEL_TYPE_UNSIGNED_INT16: + O << "unsigned_int16"; + break; + case BRIG_CHANNEL_TYPE_UNSIGNED_INT32: + O << "unsigned_int32"; + break; + case BRIG_CHANNEL_TYPE_HALF_FLOAT: + O << "half_float"; + break; + case BRIG_CHANNEL_TYPE_FLOAT: + O << "float"; + break; + } +} + +void HSAILInstPrinter::printBrigImageGeometry(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_GEOMETRY_1D: + O << "_1d"; + break; + case BRIG_GEOMETRY_2D: + O << "_2d"; + break; + case BRIG_GEOMETRY_3D: + O << "_3d"; + break; + case BRIG_GEOMETRY_1DA: + O << "_1da"; + break; + case BRIG_GEOMETRY_2DA: + O << "_2da"; + break; + case BRIG_GEOMETRY_1DB: + O << "_1db"; + break; + case BRIG_GEOMETRY_2DDEPTH: + O << "_2ddepth"; + break; + case BRIG_GEOMETRY_2DADEPTH: + O << "_2dadepth"; + break; + case BRIG_GEOMETRY_UNKNOWN: + O << "_unknown"; + break; + } +} + +void HSAILInstPrinter::printBrigImageQuery(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_IMAGE_QUERY_WIDTH: + O << "width"; + break; + case BRIG_IMAGE_QUERY_HEIGHT: + O << "height"; + break; + case BRIG_IMAGE_QUERY_DEPTH: + O << "depth"; + break; + case BRIG_IMAGE_QUERY_ARRAY: + O << "array"; + break; + case BRIG_IMAGE_QUERY_CHANNELORDER: + O << "channelorder"; + break; + case BRIG_IMAGE_QUERY_CHANNELTYPE: + O << "channeltype"; + break; + } +} + +void HSAILInstPrinter::printBrigLinkage(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_LINKAGE_NONE: + O << "NONE"; + break; + case BRIG_LINKAGE_PROGRAM: + O << "PROGRAM"; + break; + case BRIG_LINKAGE_MODULE: + O << "MODULE"; + break; + case BRIG_LINKAGE_FUNCTION: + O << "FUNCTION"; + break; + case BRIG_LINKAGE_ARG: + O << "ARG"; + break; + } +} + +void HSAILInstPrinter::printBrigMachineModel(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_MACHINE_SMALL: + O << "$small"; + break; + case BRIG_MACHINE_LARGE: + O << "$large"; + break; + } +} + +void HSAILInstPrinter::printBrigMemoryModifierMask(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_MEMORY_CONST: + O << "CONST"; + break; + } +} + +void HSAILInstPrinter::printBrigMemoryOrder(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_MEMORY_ORDER_NONE: + O << "_"; + break; + case BRIG_MEMORY_ORDER_RELAXED: + O << "_rlx"; + break; + case BRIG_MEMORY_ORDER_SC_ACQUIRE: + O << "_scacq"; + break; + case BRIG_MEMORY_ORDER_SC_RELEASE: + O << "_screl"; + break; + case BRIG_MEMORY_ORDER_SC_ACQUIRE_RELEASE: + O << "_scar"; + break; + } +} + +void HSAILInstPrinter::printBrigMemoryScope(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_MEMORY_SCOPE_WAVEFRONT: + O << "_wave"; + break; + case BRIG_MEMORY_SCOPE_WORKGROUP: + O << "_wg"; + break; + case BRIG_MEMORY_SCOPE_AGENT: + O << "_agent"; + break; + case BRIG_MEMORY_SCOPE_SYSTEM: + O << "_system"; + break; + } +} + +void HSAILInstPrinter::printBrigPack(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_PACK_NONE: + O << "_"; + break; + case BRIG_PACK_PP: + O << "_pp"; + break; + case BRIG_PACK_PS: + O << "_ps"; + break; + case BRIG_PACK_SP: + O << "_sp"; + break; + case BRIG_PACK_SS: + O << "_ss"; + break; + case BRIG_PACK_S: + O << "_s"; + break; + case BRIG_PACK_P: + O << "_p"; + break; + case BRIG_PACK_PPSAT: + O << "_pp_sat"; + break; + case BRIG_PACK_PSSAT: + O << "_ps_sat"; + break; + case BRIG_PACK_SPSAT: + O << "_sp_sat"; + break; + case BRIG_PACK_SSSAT: + O << "_ss_sat"; + break; + case BRIG_PACK_SSAT: + O << "_s_sat"; + break; + case BRIG_PACK_PSAT: + O << "_p_sat"; + break; + } +} + +void HSAILInstPrinter::printBrigProfile(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_PROFILE_BASE: + O << "$base"; + break; + case BRIG_PROFILE_FULL: + O << "$full"; + break; + } +} + +void HSAILInstPrinter::printBrigRound(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ROUND_NONE: + break; + case BRIG_ROUND_FLOAT_DEFAULT: + break; + case BRIG_ROUND_FLOAT_NEAR_EVEN: + O << "_near"; + break; + case BRIG_ROUND_FLOAT_ZERO: + O << "_zero"; + break; + case BRIG_ROUND_FLOAT_PLUS_INFINITY: + O << "_up"; + break; + case BRIG_ROUND_FLOAT_MINUS_INFINITY: + O << "_down"; + break; + case BRIG_ROUND_INTEGER_NEAR_EVEN: + O << "_neari"; + break; + case BRIG_ROUND_INTEGER_ZERO: + // This is the default for integer ops, omit it. + // O << "_zeroi"; + break; + case BRIG_ROUND_INTEGER_PLUS_INFINITY: + O << "_upi"; + break; + case BRIG_ROUND_INTEGER_MINUS_INFINITY: + O << "_downi"; + break; + case BRIG_ROUND_INTEGER_NEAR_EVEN_SAT: + O << "_neari_sat"; + break; + case BRIG_ROUND_INTEGER_ZERO_SAT: + O << "_zeroi_sat"; + break; + case BRIG_ROUND_INTEGER_PLUS_INFINITY_SAT: + O << "_upi_sat"; + break; + case BRIG_ROUND_INTEGER_MINUS_INFINITY_SAT: + O << "_downi_sat"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN: + O << "_sneari"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_ZERO: + O << "_szeroi"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY: + O << "_supi"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY: + O << "_sdowni"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_NEAR_EVEN_SAT: + O << "_sneari_sat"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_ZERO_SAT: + O << "_szeroi_sat"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_PLUS_INFINITY_SAT: + O << "_supi_sat"; + break; + case BRIG_ROUND_INTEGER_SIGNALING_MINUS_INFINITY_SAT: + O << "_sdowni_sat"; + break; + } +} + +void HSAILInstPrinter::printBrigSamplerAddressing(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_ADDRESSING_UNDEFINED: + O << "UNDEFINED"; + break; + case BRIG_ADDRESSING_CLAMP_TO_EDGE: + O << "CLAMP_TO_EDGE"; + break; + case BRIG_ADDRESSING_CLAMP_TO_BORDER: + O << "CLAMP_TO_BORDER"; + break; + case BRIG_ADDRESSING_REPEAT: + O << "REPEAT"; + break; + case BRIG_ADDRESSING_MIRRORED_REPEAT: + O << "MIRRORED_REPEAT"; + break; + } +} + +void HSAILInstPrinter::printBrigSamplerCoordNormalization(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_COORD_UNNORMALIZED: + O << "unnormalized"; + break; + case BRIG_COORD_NORMALIZED: + O << "normalized"; + break; + } +} + +void HSAILInstPrinter::printBrigSamplerFilter(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_FILTER_NEAREST: + O << "nearest"; + break; + case BRIG_FILTER_LINEAR: + O << "linear"; + break; + } +} + +void HSAILInstPrinter::printBrigSamplerQuery(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_SAMPLER_QUERY_ADDRESSING: + O << "addressing"; + break; + case BRIG_SAMPLER_QUERY_COORD: + O << "coord"; + break; + case BRIG_SAMPLER_QUERY_FILTER: + O << "filter"; + break; + } +} + +void HSAILInstPrinter::printBrigSegCvtModifierMask(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_SEG_CVT_NONULL: + O << "_nonull"; + break; + } +} + +void HSAILInstPrinter::printBrigSegment(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case HSAILAS::FLAT_ADDRESS: + // Assumed default. + break; + case HSAILAS::GLOBAL_ADDRESS: + O << "_global"; + break; + case HSAILAS::READONLY_ADDRESS: + O << "_readonly"; + break; + case HSAILAS::KERNARG_ADDRESS: + O << "_kernarg"; + break; + case HSAILAS::GROUP_ADDRESS: + O << "_group"; + break; + case HSAILAS::PRIVATE_ADDRESS: + O << "_private"; + break; + case HSAILAS::SPILL_ADDRESS: + O << "_spill"; + break; + case HSAILAS::ARG_ADDRESS: + O << "_arg"; + break; + case HSAILAS::REGION_ADDRESS: { + // For now, the only non-flat implied segment appears to be region. + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + if (Desc.TSFlags & HSAILInstrFlags::HasDefaultSegment) + break; + + O << "_region"; + break; + } + default: + llvm_unreachable("bad segment value"); + } + +#if 0 + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_SEGMENT_GLOBAL: + O << "_global"; + break; + case BRIG_SEGMENT_READONLY: + O << "_readonly"; + break; + case BRIG_SEGMENT_KERNARG: + O << "_kernarg"; + break; + case BRIG_SEGMENT_GROUP: + O << "_group"; + break; + case BRIG_SEGMENT_PRIVATE: + O << "_private"; + break; + case BRIG_SEGMENT_SPILL: + O << "_spill"; + break; + case BRIG_SEGMENT_ARG: + O << "_arg"; + break; + case BRIG_SEGMENT_AMD_GCN: + O << "_region"; + break; + default: + llvm_unreachable("bad segment value"); + } +#endif +} + +void HSAILInstPrinter::printBrigType(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_TYPE_U8: + O << "_u8"; + break; + case BRIG_TYPE_U16: + O << "_u16"; + break; + case BRIG_TYPE_U32: + O << "_u32"; + break; + case BRIG_TYPE_U64: + O << "_u64"; + break; + case BRIG_TYPE_S8: + O << "_s8"; + break; + case BRIG_TYPE_S16: + O << "_s16"; + break; + case BRIG_TYPE_S32: + O << "_s32"; + break; + case BRIG_TYPE_S64: + O << "_s64"; + break; + case BRIG_TYPE_F16: + O << "_f16"; + break; + case BRIG_TYPE_F32: + O << "_f32"; + break; + case BRIG_TYPE_F64: + O << "_f64"; + break; + case BRIG_TYPE_B1: + O << "_b1"; + break; + case BRIG_TYPE_B8: + O << "_b8"; + break; + case BRIG_TYPE_B16: + O << "_b16"; + break; + case BRIG_TYPE_B32: + O << "_b32"; + break; + case BRIG_TYPE_B64: + O << "_b64"; + break; + case BRIG_TYPE_B128: + O << "_b128"; + break; + case BRIG_TYPE_SAMP: + O << "_samp"; + break; + case BRIG_TYPE_ROIMG: + O << "_roimg"; + break; + case BRIG_TYPE_WOIMG: + O << "_woimg"; + break; + case BRIG_TYPE_RWIMG: + O << "_rwimg"; + break; + case BRIG_TYPE_SIG32: + O << "_sig32"; + break; + case BRIG_TYPE_SIG64: + O << "_sig64"; + break; + case BRIG_TYPE_U8X4: + O << "_u8x4"; + break; + case BRIG_TYPE_U8X8: + O << "_u8x8"; + break; + case BRIG_TYPE_U8X16: + O << "_u8x16"; + break; + case BRIG_TYPE_U16X2: + O << "_u16x2"; + break; + case BRIG_TYPE_U16X4: + O << "_u16x4"; + break; + case BRIG_TYPE_U16X8: + O << "_u16x8"; + break; + case BRIG_TYPE_U32X2: + O << "_u32x2"; + break; + case BRIG_TYPE_U32X4: + O << "_u32x4"; + break; + case BRIG_TYPE_U64X2: + O << "_u64x2"; + break; + case BRIG_TYPE_S8X4: + O << "_s8x4"; + break; + case BRIG_TYPE_S8X8: + O << "_s8x8"; + break; + case BRIG_TYPE_S8X16: + O << "_s8x16"; + break; + case BRIG_TYPE_S16X2: + O << "_s16x2"; + break; + case BRIG_TYPE_S16X4: + O << "_s16x4"; + break; + case BRIG_TYPE_S16X8: + O << "_s16x8"; + break; + case BRIG_TYPE_S32X2: + O << "_s32x2"; + break; + case BRIG_TYPE_S32X4: + O << "_s32x4"; + break; + case BRIG_TYPE_S64X2: + O << "_s64x2"; + break; + case BRIG_TYPE_F16X2: + O << "_f16x2"; + break; + case BRIG_TYPE_F16X4: + O << "_f16x4"; + break; + case BRIG_TYPE_F16X8: + O << "_f16x8"; + break; + case BRIG_TYPE_F32X2: + O << "_f32x2"; + break; + case BRIG_TYPE_F32X4: + O << "_f32x4"; + break; + case BRIG_TYPE_F64X2: + O << "_f64x2"; + break; + } +} + +void HSAILInstPrinter::printBrigVariableModifierMask(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + switch (MI->getOperand(OpNo).getImm()) { + case BRIG_VARIABLE_DEFINITION: + O << "DEFINITION"; + break; + case BRIG_VARIABLE_CONST: + O << "CONST"; + break; + } +} + +static void printBrigWidthImpl(raw_ostream &O, unsigned Width) { + switch (Width) { + case BRIG_WIDTH_NONE: + O << "_width(NONE)"; + break; + case BRIG_WIDTH_1: + O << "_width(1)"; + break; + case BRIG_WIDTH_2: + O << "_width(2)"; + break; + case BRIG_WIDTH_4: + O << "_width(4)"; + break; + case BRIG_WIDTH_8: + O << "_width(8)"; + break; + case BRIG_WIDTH_16: + O << "_width(16)"; + break; + case BRIG_WIDTH_32: + O << "_width(32)"; + break; + case BRIG_WIDTH_64: + O << "_width(64)"; + break; + case BRIG_WIDTH_128: + O << "_width(128)"; + break; + case BRIG_WIDTH_256: + O << "_width(256)"; + break; + case BRIG_WIDTH_512: + O << "_width(512)"; + break; + case BRIG_WIDTH_1024: + O << "_width(1024)"; + break; + case BRIG_WIDTH_2048: + O << "_width(2048)"; + break; + case BRIG_WIDTH_4096: + O << "_width(4096)"; + break; + case BRIG_WIDTH_8192: + O << "_width(8192)"; + break; + case BRIG_WIDTH_16384: + O << "_width(16384)"; + break; + case BRIG_WIDTH_32768: + O << "_width(32768)"; + break; + case BRIG_WIDTH_65536: + O << "_width(65536)"; + break; + case BRIG_WIDTH_131072: + O << "_width(131072)"; + break; + case BRIG_WIDTH_262144: + O << "_width(262144)"; + break; + case BRIG_WIDTH_524288: + O << "_width(524288)"; + break; + case BRIG_WIDTH_1048576: + O << "_width(1048576)"; + break; + case BRIG_WIDTH_2097152: + O << "_width(2097152)"; + break; + case BRIG_WIDTH_4194304: + O << "_width(4194304)"; + break; + case BRIG_WIDTH_8388608: + O << "_width(8388608)"; + break; + case BRIG_WIDTH_16777216: + O << "_width(16777216)"; + break; + case BRIG_WIDTH_33554432: + O << "_width(33554432)"; + break; + case BRIG_WIDTH_67108864: + O << "_width(67108864)"; + break; + case BRIG_WIDTH_134217728: + O << "_width(134217728)"; + break; + case BRIG_WIDTH_268435456: + O << "_width(268435456)"; + break; + case BRIG_WIDTH_536870912: + O << "_width(536870912)"; + break; + case BRIG_WIDTH_1073741824: + O << "_width(1073741824)"; + break; + case BRIG_WIDTH_2147483648: + O << "_width(2147483648)"; + break; + case BRIG_WIDTH_WAVESIZE: + O << "_width(WAVESIZE)"; + break; + case BRIG_WIDTH_ALL: + O << "_width(all)"; + break; + } +} + +void HSAILInstPrinter::printBrigWidth(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Width = MI->getOperand(OpNo).getImm(); + + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + uint32_t DefaultWidth = (Desc.TSFlags & HSAILInstrFlags::WidthAttr) >> + Log2_32(HSAILInstrFlags::WidthAttrLo); + + // Don't print the width modifier if it is the default for the instruction. + switch (DefaultWidth) { + case HSAILWidthAttrFlags::WidthAttrOne: + if (Width != BRIG_WIDTH_1) + printBrigWidthImpl(O, Width); + return; + + case HSAILWidthAttrFlags::WidthAttrAll: + if (Width != BRIG_WIDTH_ALL) + printBrigWidthImpl(O, Width); + return; + + case HSAILWidthAttrFlags::WidthAttrWaveSize: + if (Width != BRIG_WIDTH_WAVESIZE) + printBrigWidthImpl(O, Width); + return; + + case HSAILWidthAttrFlags::WidthAttrNone: + printBrigWidthImpl(O, Width); + return; + } +} + +void HSAILInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + O << getRegisterName(Op.getReg()); + } else if (Op.isExpr()) { + const MCExpr *Exp = Op.getExpr(); + Exp->print(O, &MAI); + } else if (Op.isImm() || Op.isFPImm()) { + llvm_unreachable("Immediate should have been handled by special printer"); + } else { + llvm_unreachable("unknown operand type in printOperand"); + } +} + +#include "HSAILGenAsmWriter.inc" Index: lib/Target/HSAIL/InstPrinter/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/HSAIL/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HSAILAsmPrinter +parent = HSAIL +required_libraries = MC Support +add_to_library_groups = HSAIL + Index: lib/Target/HSAIL/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/LLVMBuild.txt @@ -0,0 +1,32 @@ +;===- ./lib/Target/HSAIL/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[common] +subdirectories = InstPrinter MCTargetDesc TargetInfo + +[component_0] +type = TargetGroup +name = HSAIL +parent = Target +has_asmprinter = 1 + +[component_1] +type = Library +name = HSAILCodeGen +parent = HSAIL +required_libraries = AsmPrinter CodeGen Core IPO HSAILAsmPrinter HSAILDesc MC SelectionDAG Support Target +add_to_library_groups = HSAIL Index: lib/Target/HSAIL/MCTargetDesc/BRIGDwarfStreamer.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/BRIGDwarfStreamer.h @@ -0,0 +1,43 @@ +//===-- BRIGDwarfStreamer.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef BRIG_DWARF_STREAMER +#define BRIG_DWARF_STREAMER + +#include "llvm/MC/MCELFStreamer.h" + +namespace llvm { + +class RawVectorOstream; + +class BRIGDwarfStreamer : public MCELFStreamer { +private: + RawVectorOstream *dwarfStream; + +public: + BRIGDwarfStreamer(MCContext &Context, MCAsmBackend &TAB, + RawVectorOstream &RVOS, MCCodeEmitter *Emitter); + + void InitSections(bool NoExecStack) override; + void Finish(); + + RawVectorOstream *getDwarfStream(); + + // support for LLVM-style RTTI operations like dyn_cast + inline static bool classof(const BRIGDwarfStreamer *) { return true; } + inline static bool classof(const MCStreamer *streamer) { return true; } +}; + +MCStreamer *createBRIGDwarfStreamer(MCContext &Context, MCAsmBackend &MAB, + RawVectorOstream &RVOS, MCCodeEmitter *CE, + bool RelaxAll); + +} // namespace llvm + +#endif Index: lib/Target/HSAIL/MCTargetDesc/BRIGDwarfStreamer.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/BRIGDwarfStreamer.cpp @@ -0,0 +1,66 @@ +//===-- BRIGDwarfStreamer.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BRIGDwarfStreamer.h" + +#include "RawVectorOstream.h" + +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" + + +using namespace llvm; + +BRIGDwarfStreamer::BRIGDwarfStreamer(MCContext &Context, MCAsmBackend &TAB, + RawVectorOstream &RVOS, + MCCodeEmitter *Emitter) + : MCELFStreamer(Context, TAB, RVOS, Emitter), + dwarfStream(&RVOS) {} + +RawVectorOstream *BRIGDwarfStreamer::getDwarfStream() { + raw_ostream &Strm = getAssembler().getWriter().getStream(); + + // We must ensure MC layer is writing to the same stream. + assert(&Strm == static_cast(dwarfStream) && + "MC layer doesn't write to DWARF stream"); + return dwarfStream; +} + +void BRIGDwarfStreamer::InitSections(bool NoExecStack) { + MCSectionELF *codeSection = getContext().getELFSection( + ".brigcode", ELF::SHT_NOBITS, 0); + MCSectionELF *directivesSection = getContext().getELFSection( + ".brigdirectives", ELF::SHT_NOBITS, 0); + SwitchSection(codeSection); + SwitchSection(directivesSection); + SwitchSection(codeSection); +} + +#if 0 +MCStreamer::MCStreamerKind BRIGDwarfStreamer::getStreamerKind() const { + return MCStreamer::BRIGDwarfStreamer; +} +#endif + +void BRIGDwarfStreamer::Finish() { + MCELFStreamer::Finish(); + // flush all DWARF data captured + dwarfStream->flush(); + // stop writing to another stream, if any provided + dwarfStream->releaseStream(); +} + +MCStreamer *llvm::createBRIGDwarfStreamer(MCContext &Context, MCAsmBackend &MAB, + RawVectorOstream &RVOS, + MCCodeEmitter *CE, bool RelaxAll) { + BRIGDwarfStreamer *S = new BRIGDwarfStreamer(Context, MAB, RVOS, CE); + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; +} Index: lib/Target/HSAIL/MCTargetDesc/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,16 @@ + +add_llvm_library(LLVMHSAILDesc + BRIGDwarfStreamer.cpp + + HSAILAsmBackend.cpp + HSAILELFObjectWriter.cpp + HSAILMCCodeEmitter.cpp + HSAILMCTargetDesc.cpp + HSAILTargetStreamer.cpp + HSAILMCAsmInfo.cpp + RawVectorOstream.cpp + ) + +# FIXME: How does this work for every other target? None of them need +# to specify this dependency. +target_link_libraries(LLVMHSAILDesc PRIVATE LLVMHSAILAsmPrinter) Index: lib/Target/HSAIL/MCTargetDesc/HSAILAsmBackend.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILAsmBackend.h @@ -0,0 +1,73 @@ +//===-- HSAILAsmBackend.h - HSAIL Assembler Backend -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILASMBACKEND_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILASMBACKEND_H + +#include "HSAILELFObjectWriter.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSectionELF.h" + +using namespace llvm; + +namespace { +class HSAILAsmBackend : public MCAsmBackend { +public: + HSAILAsmBackend(const Target &T); + + unsigned getNumFixupKinds() const override { + assert(!"When do we hit this?"); + return 0; + } + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override; + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; + + bool mayNeedRelaxation(const MCInst &Inst) const override; + + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override; + + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; +}; + +class ELFHSAILAsmBackend : public HSAILAsmBackend { +public: + ELFHSAILAsmBackend(const Target &T) : HSAILAsmBackend(T) {} +}; + +class ELFHSAIL_32AsmBackend : public ELFHSAILAsmBackend { +public: + ELFHSAIL_32AsmBackend(const Target &T) : ELFHSAILAsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createELFObjectWriter( + new HSAILELFObjectWriter(false, ELF::EM_HSAIL, false), OS, + /*IsLittleEndian*/ true); + } +}; + +class ELFHSAIL_64AsmBackend : public ELFHSAILAsmBackend { +public: + ELFHSAIL_64AsmBackend(const Target &T) : ELFHSAILAsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + return createELFObjectWriter( + new HSAILELFObjectWriter(true, ELF::EM_HSAIL_64, false), OS, + /*IsLittleEndian*/ true); + } +}; +} // end anonymous namespace + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILAsmBackend.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILAsmBackend.cpp @@ -0,0 +1,113 @@ +//===-- HSAILAsmBackend.h - HSAIL Assembler Backend -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" +#include "HSAILAsmBackend.h" +using namespace llvm; + +HSAILAsmBackend::HSAILAsmBackend(const Target &T) {} + +/// createObjectWriter - Create a new MCObjectWriter instance for use by the +/// assembler backend to emit the final object file. +MCObjectWriter *HSAILAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const { + assert(!"When do we hit this?"); + return nullptr; +} + +// pulled from x86asmbackend.cpp, used in ApplyFixup +// +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: + llvm_unreachable("Invalid fixup kind!"); + case FK_PCRel_1: + case FK_Data_1: + return 0; + case FK_PCRel_2: + case FK_Data_2: + return 1; + case FK_PCRel_4: + // case X86::reloc_riprel_4byte: + // case X86::reloc_riprel_4byte_movq_load: + // case X86::reloc_signed_4byte: + // case X86::reloc_global_offset_table: + case FK_Data_4: + return 2; + case FK_PCRel_8: + case FK_Data_8: + return 3; + } +} + +/// applyFixup - Apply the \arg Value for given \arg Fixup into the provided +/// data fragment, at the offset specified by the fixup and following the +/// fixup kind as appropriate. +void HSAILAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, + unsigned DataSize, uint64_t Value, + bool IsPCRel) const { + // pulled from x86asmbackend.cpp + unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); + + assert(Fixup.getOffset() + Size <= DataSize && "Invalid fixup offset!"); + + // Check that uppper bits are either all zeros or all ones. + // Specifically ignore overflow/underflow as long as the leakage is + // limited to the lower bits. This is to remain compatible with + // other assemblers. + assert(isIntN(Size * 8 + 1, Value) && + "Value does not fit in the Fixup field"); + for (unsigned i = 0; i != Size; ++i) + Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8)); +} + +/// mayNeedRelaxation - Check whether the given instruction may need +/// relaxation. +/// \arg Inst - The instruction to test. +/// \arg Fixups - The actual fixups this instruction encoded to, for potential +/// use by the target backend. +bool HSAILAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { + return false; +} + +/// fixupNeedsRelaxation - Target specific predicate for whether a given +/// fixup requires the associated instruction to be relaxed. +bool HSAILAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const { + assert(!"When do we hit this?"); + return false; +} + +/// relaxInstruction - Relax the instruction in the given fragment to the next +/// wider instruction. +void HSAILAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { + assert(!"When do we hit this?"); +} + +/// writeNopData - Write an (optimal) nop sequence of Count bytes to the given +/// output. If the target cannot generate such a sequence, it should return an +/// error. +/// \return - True on success. +bool HSAILAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { + return true; +} + +MCAsmBackend *llvm::createHSAIL32AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, + StringRef CPU) { + return new ELFHSAIL_32AsmBackend(T); +} + +MCAsmBackend *llvm::createHSAIL64AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, + StringRef CPU) { + return new ELFHSAIL_64AsmBackend(T); +} Index: lib/Target/HSAIL/MCTargetDesc/HSAILELFObjectWriter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILELFObjectWriter.h @@ -0,0 +1,29 @@ +//===-- HSAILELFObjectWriter.h - HSAIL ELF Object Writer --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILELFOBJECTWRITER_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILELFOBJECTWRITER_H + +#include "llvm/MC/MCELFObjectWriter.h" + +namespace llvm { +class HSAILELFObjectWriter : public MCELFObjectTargetWriter { +public: + HSAILELFObjectWriter(bool IsLargeModel, uint16_t EMachine, + bool HasRelocationAddend); + +protected: + unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel) const override { + return 0; // currently return 0, which means no relocation + } +}; +} + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILELFObjectWriter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILELFObjectWriter.cpp @@ -0,0 +1,15 @@ +//===-- HSAILELFObjectWriter.cpp - HSAIL ELF Object Writer ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAILELFObjectWriter.h" +using namespace llvm; + +HSAILELFObjectWriter::HSAILELFObjectWriter(bool IsLargeModel, uint16_t EMachine, + bool HasRelocationAddend) + : MCELFObjectTargetWriter(IsLargeModel, 0, EMachine, HasRelocationAddend) {} Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCAsmInfo.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- HSAILMCAsmInfo.h - HSAIL asm properties -----------------*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the HSAILMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILMCASMINFO_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILMCASMINFO_H + +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { +class Triple; + +struct HSAILELFMCAsmInfo : public MCAsmInfo { + explicit HSAILELFMCAsmInfo(const Triple &TT); + MCSection *getNonexecutableStackSection(MCContext &Ctx) const override; + + bool isValidUnquotedName(StringRef Name) const override; +}; +} // namespace llvm + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCAsmInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCAsmInfo.cpp @@ -0,0 +1,92 @@ +//===-- HSAILMCAsmInfo.cpp - HSAIL asm properties -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the HSAILMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "HSAILMCAsmInfo.h" +#include "HSAILTargetMachine.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" +using namespace llvm; + +HSAILELFMCAsmInfo::HSAILELFMCAsmInfo(const Triple &TT) { + PrivateGlobalPrefix = "&"; + PrivateLabelPrefix = "@"; + GlobalDirective = "global"; + SupportsQuotedNames = false; + HasDotTypeDotSizeDirective = false; + HasSingleParameterDotFile = false; + + // We must set SupportsDebugInformation to true in order for debug info to + // be generated. This shouldn't cause unwanted output, because if the FE + // does not produce debug metadata (no -g option) then there won't be (much) + // debug info generated. + // TODO: we may need to especially ensure that when -g is not passed to the + // FE, + // BRIGAsmPrinter does not create large ".text", etc., sections in + // order to + // save space and I/O time. + // + + // FIXME: Setting SupportsDebugInformation to true causes an assertion + // failure in the AsmPrinter() destructor. + // Assertion `!DD && Handlers.empty() && "Debug/EH info didn't get finalized"' + // failed. + SupportsDebugInformation = false; + + PointerSize = TT.getArch() == Triple::hsail64 ? 8 : 4; + + ExceptionsType = ExceptionHandling::None; + // DwarfRequiresFrameSection = false; + CommentString = "//"; + Data8bitsDirective = "sectiondata_b8\t"; + Data16bitsDirective = "sectiondata_b16\t"; + Data32bitsDirective = "sectiondata_b32\t"; + Data64bitsDirective = "sectiondata_b64\t"; +} + +MCSection * +HSAILELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const { + return nullptr; +} + +static bool isValidChar(char C) { + return (C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9') || C == '_' || C == '$' || C == '.' || C == '@'; +} + +static bool isValidFirstChar(char C) { + return isValidChar(C) && C != '.' && !(C >= '0' && C <= '9'); +} + +bool HSAILELFMCAsmInfo::isValidUnquotedName(StringRef Name) const { + char First = Name.front(); + assert((First == '%' || First == '&' || First == '@') && + "Missing valid prefix character"); + Name = Name.drop_front(1); + + if (!Name.empty()) { + if (!isValidFirstChar(Name.front())) + return false; + + Name = Name.drop_front(); + } + + for (char C : Name) { + if (!isValidChar(C)) + return false; + } + + return true; +} Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCCodeEmitter.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCCodeEmitter.h @@ -0,0 +1,71 @@ +//=== HSAILMCCodeEmitter.h - convert HSAIL code to machine code -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface to convert HSAIL code to machine code. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILMCCODEEMITTER_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILMCCODEEMITTER_H + +#define DEBUG_TYPE "mccodeemitter" +#include "HSAIL.h" +#include "HSAILSubtarget.h" +#include "HSAILInstrInfo.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +class HSAILMCCodeEmitter : public MCCodeEmitter { + HSAILMCCodeEmitter(const HSAILMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const HSAILMCCodeEmitter &); // DO NOT IMPLEMENT + const MCInstrInfo &MCII; + const MCRegisterInfo &MRI; + MCContext &Ctx; + +public: + HSAILMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, + MCContext &ctx) + : MCII(mcii), MRI(mri), Ctx(ctx) {} + + ~HSAILMCCodeEmitter() {} + + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const; + + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, + raw_ostream &OS) const; + + void EmitImmediate(const MCOperand &Disp, unsigned ImmSize, + MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl &Fixups, int ImmOffset = 0) const; + + void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, + unsigned &CurByte, raw_ostream &OS) const; + + void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, + unsigned &CurByte, raw_ostream &OS) const; + + void EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, + uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl &Fixups) const; + + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + raw_ostream &OS) const; +}; + +} // end anonymous namespace + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCCodeEmitter.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCCodeEmitter.cpp @@ -0,0 +1,104 @@ +//===-- HSAIL/HSAILMCCodeEmitter.cpp - Convert HSAIL code to machine code -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HSAILMCCodeEmitter class. +// +//===----------------------------------------------------------------------===// + +#include "HSAILMCCodeEmitter.h" + +using namespace llvm; + +void HSAILMCCodeEmitter::EmitByte(unsigned char C, unsigned &CurByte, + raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitConstant(uint64_t Val, unsigned Size, + unsigned &CurByte, + raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitRegModRMByte(const MCOperand &ModRMReg, + unsigned RegOpcodeFld, + unsigned &CurByte, + raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, + unsigned &CurByte, raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +namespace { +class DummyMCCodeEmitter : public MCCodeEmitter { +private: + DummyMCCodeEmitter(const DummyMCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const DummyMCCodeEmitter &); // DO NOT IMPLEMENT +protected: // Can only create subclasses. + DummyMCCodeEmitter(); + + const MCInstrInfo &m_ii; + const MCSubtargetInfo &m_sti; + MCContext &m_ctx; + +public: + DummyMCCodeEmitter(const MCInstrInfo &II, const MCSubtargetInfo &STI, + MCContext &Ctx) + : m_ii(II), m_sti(STI), m_ctx(Ctx) {} + DummyMCCodeEmitter(const MCInstrInfo &II, const MCRegisterInfo &MRI, + const MCSubtargetInfo &STI, MCContext &Ctx) + : m_ii(II), m_sti(STI), m_ctx(Ctx) {} + + ~DummyMCCodeEmitter() override {} + + void encodeInstruction(const MCInst &Inst, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override { + llvm_unreachable("DummyMCCodeEmitter::EncodeInstruction called..."); + } +}; +} + +MCCodeEmitter *llvm::createHSAILMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx) { + return new HSAILMCCodeEmitter(MCII, MRI, Ctx); +} + +void HSAILMCCodeEmitter::EmitImmediate(const MCOperand &DispOp, unsigned Size, + MCFixupKind FixupKind, unsigned &CurByte, + raw_ostream &OS, + SmallVectorImpl &Fixups, + int ImmOffset) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitMemModRMByte( + const MCInst &MI, unsigned Op, unsigned RegOpcodeField, uint64_t TSFlags, + unsigned &CurByte, raw_ostream &OS, + SmallVectorImpl &Fixups) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, + unsigned &CurByte, + int MemOperand, + const MCInst &MI, + raw_ostream &OS) const { + llvm_unreachable("When do we hit this?"); +} + +void HSAILMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + llvm_unreachable("When do we hit this?"); +} Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCTargetDesc.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCTargetDesc.h @@ -0,0 +1,60 @@ +//===-- HSAILMCTargetDesc.h - HSAIL Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief Provides HSAIL specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILMCTARGETDESC_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILMCTARGETDESC_H + +#include "llvm/ADT/StringRef.h" + +namespace llvm { +class MCAsmBackend; +class MCCodeEmitter; +class MCContext; +class MCInstrInfo; +class MCObjectWriter; +class MCRegisterInfo; +class MCSubtargetInfo; +class Target; +class Triple; +class raw_ostream; + +extern Target TheHSAIL_32Target, TheHSAIL_64Target; + +MCCodeEmitter *createHSAILMCCodeEmitter(const MCInstrInfo &MCII, + const MCRegisterInfo &MRI, + MCContext &Ctx); + +MCAsmBackend *createHSAIL32AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, + StringRef CPU); + +MCAsmBackend *createHSAIL64AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + const Triple &TT, + StringRef CPU); + +} // End llvm namespace + +#define GET_REGINFO_ENUM +#include "HSAILGenRegisterInfo.inc" + +#define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_OPERAND_ENUM +#include "HSAILGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "HSAILGenSubtargetInfo.inc" + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILMCTargetDesc.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILMCTargetDesc.cpp @@ -0,0 +1,126 @@ +//===-- HSAILMCTargetDesc.cpp - HSAIL Target Descriptions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// \brief This file provides HSAIL specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "HSAILMCTargetDesc.h" +#include "HSAILMCAsmInfo.h" +#include "HSAILMCCodeEmitter.h" +#include "InstPrinter/HSAILInstPrinter.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#include "HSAILTargetStreamer.h" + +#include "BRIGDwarfStreamer.h" +#include "RawVectorOstream.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "HSAILGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "HSAILGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "HSAILGenRegisterInfo.inc" + +// MC related code probably should be in MCTargetDesc subdir +static MCCodeGenInfo *createHSAILMCCodeGenInfo(const Triple &TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + X->initMCCodeGenInfo(RM, CM, OL); + return X; +} + +static MCInstrInfo *createHSAILMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitHSAILMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createHSAILMCRegisterInfo(const Triple &TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitHSAILMCRegisterInfo(X, 0); + return X; +} + +static MCSubtargetInfo *createHSAILMCSubtargetInfo(const Triple &TT, StringRef CPU, + StringRef FS) { + return createHSAILMCSubtargetInfoImpl(TT, CPU, FS); +} + +#if 1 +static MCStreamer *createBRIGStreamer(const Triple &T, MCContext &Ctx, + MCAsmBackend &TAB, + raw_pwrite_stream &OS, + MCCodeEmitter *Emitter, + bool RelaxAll) { + // pass 0 instead of &_OS, if you do not want DWARF data to be forwarded to + // the provided stream + // this stream will be deleted in the destructor of BRIGAsmPrinter + RawVectorOstream *RVOS = new RawVectorOstream(&OS); + + return createBRIGDwarfStreamer(Ctx, TAB, *RVOS, Emitter, RelaxAll); +} +#else +static MCStreamer *createBRIGStreamer(MCStreamer &S, + const MCSubtargetInfo &STI) { + // pass 0 instead of &_OS, if you do not want DWARF data to be forwarded to + // the provided stream + // this stream will be deleted in the destructor of BRIGAsmPrinter + RawVectorOstream *RVOS = new RawVectorOstream(&OS); + + return createBRIGDwarfStreamer(Ctx, TAB, *RVOS, Emitter, RelaxAll); +} + +#endif + + +MCTargetStreamer * +createHSAILObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { + return new HSAILTargetStreamer(S); +} + +static MCInstPrinter *createHSAILMCInstPrinter(const Triple &TT, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + const MCInstrInfo &MII, + const MCRegisterInfo &MRI) { + return new HSAILInstPrinter(MAI, MII, MRI); +} + +extern "C" void LLVMInitializeHSAILTargetMC() { + for (Target *T : { &TheHSAIL_32Target, &TheHSAIL_64Target }) { + RegisterMCAsmInfo X(*T); + + TargetRegistry::RegisterMCCodeGenInfo(*T, createHSAILMCCodeGenInfo); + TargetRegistry::RegisterMCInstrInfo(*T, createHSAILMCInstrInfo); + TargetRegistry::RegisterMCRegInfo(*T, createHSAILMCRegisterInfo); + TargetRegistry::RegisterMCSubtargetInfo(*T, createHSAILMCSubtargetInfo); + TargetRegistry::RegisterMCInstPrinter(*T, createHSAILMCInstPrinter); + TargetRegistry::RegisterMCCodeEmitter(*T, createHSAILMCCodeEmitter); + TargetRegistry::RegisterELFStreamer(*T, createBRIGStreamer); +// TargetRegistry::RegisterObjectTargetStreamer(*T, createHSAILObjectTargetStreamer); + } + + TargetRegistry::RegisterMCAsmBackend(TheHSAIL_32Target, + createHSAIL32AsmBackend); + TargetRegistry::RegisterMCAsmBackend(TheHSAIL_64Target, + createHSAIL64AsmBackend); +} Index: lib/Target/HSAIL/MCTargetDesc/HSAILTargetStreamer.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILTargetStreamer.h @@ -0,0 +1,28 @@ +//===- HSAILTargetStreamer.h -------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HSAILTargetStreamer class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILTARGETSTREAMER_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_HSAILTARGETSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class HSAILTargetStreamer : public MCTargetStreamer { +public: + HSAILTargetStreamer(MCStreamer &S); + ~HSAILTargetStreamer(); +}; +} + +#endif Index: lib/Target/HSAIL/MCTargetDesc/HSAILTargetStreamer.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/HSAILTargetStreamer.cpp @@ -0,0 +1,28 @@ +//===- HSAILTargetStreamer.cpp ----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the HSAILTargetStreamer class. +// +//===----------------------------------------------------------------------===// +#include "HSAILTargetStreamer.h" + +using namespace llvm; + +HSAILTargetStreamer::HSAILTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} + +HSAILTargetStreamer::~HSAILTargetStreamer() {} + +class HSAILTargetAsmStreamer : public HSAILTargetStreamer { +public: + HSAILTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS); +}; + +HSAILTargetAsmStreamer::HSAILTargetAsmStreamer(MCStreamer &S, + formatted_raw_ostream &OS) + : HSAILTargetStreamer(S) {} Index: lib/Target/HSAIL/MCTargetDesc/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/HSAIL/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HSAILDesc +parent = HSAIL +required_libraries = MC HSAILInfo Support +add_to_library_groups = HSAIL Index: lib/Target/HSAIL/MCTargetDesc/RawVectorOstream.h =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/RawVectorOstream.h @@ -0,0 +1,58 @@ +//===-- RawVectorOstream.h --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Interface for RawVectorOstream which is used for capturing DWARF data from +/// MC +/// layer +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_RAWVECTOROSTREAM_H +#define LLVM_LIB_TARGET_HSAIL_MCTARGETDESC_RAWVECTOROSTREAM_H + +#include "llvm/Support/raw_ostream.h" +#include + +namespace llvm { + +class RawVectorOstream : public raw_pwrite_stream { + static const size_t AllocationChunk = 1024; + std::vector Data; + std::vector::size_type Position; + raw_pwrite_stream *Other; + + void write_impl(const char *Ptr, size_t Size) override; + void pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) override; + + uint64_t current_pos() const override; + +public: + explicit RawVectorOstream(raw_pwrite_stream *other); + virtual ~RawVectorOstream(); + + StringRef getData() const { + return StringRef(Data.data(), Data.size()); + } + + void releaseStream(); + + // Other stream is the stream that is used to forward all data written to the + // instance of RawVectorOstream. If other stream is NULL then RawVectorOstream + // does not forward captured data to any other stream, it just stores captured + // data in the internal memory buffer. + raw_ostream *getOtherStream() { + return Other; + } + + void setOtherStream(raw_pwrite_stream *Other); +}; +} + +#endif Index: lib/Target/HSAIL/MCTargetDesc/RawVectorOstream.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/MCTargetDesc/RawVectorOstream.cpp @@ -0,0 +1,70 @@ +//===-- RawVectorOstream.cpp ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "RawVectorOstream.h" + +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void RawVectorOstream::write_impl(const char *Ptr, size_t Size) { + /* copy data to the internal buffer */ + if (Position + Size > Data.size()) { + /* allocate more memory if required */ + size_t nChunks = + (Position + Size + AllocationChunk - 1) / AllocationChunk; + Data.resize(nChunks * AllocationChunk); + } + + std::copy(Ptr, Ptr + Size, Data.begin() + Position); + Position += Size; + /* write data to the other stream, if any provided */ + if (Other) { + Other->write(Ptr, Size); + } +} + +void RawVectorOstream::pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) { + flush(); + memcpy(Data.data() + Offset, Ptr, Size); + + if (Other) + Other->pwrite(Ptr, Size, Offset); +} + +uint64_t RawVectorOstream::current_pos() const { + return static_cast(Position); +} + +RawVectorOstream::RawVectorOstream(raw_pwrite_stream *other) + : Data(AllocationChunk), Position(0), Other(other) {} + +RawVectorOstream::~RawVectorOstream() { + // make sure that releaseStream has been called before RawVectorOstream is + // deleted + assert(!Other); +#if 0 + if(Other) { + flush(); + Other->flush(); + } +#endif +} + +void RawVectorOstream::releaseStream() { + if (Other) { + Other->flush(); + } + Other = 0; +} + +void RawVectorOstream::setOtherStream(raw_pwrite_stream *other) { + releaseStream(); + Other = other; +} Index: lib/Target/HSAIL/TargetInfo/CMakeLists.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/TargetInfo/CMakeLists.txt @@ -0,0 +1,12 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMHSAILInfo + HSAILTargetInfo.cpp + ) + +add_dependencies(LLVMHSAILInfo + LLVMMC + LLVMSupport + LLVMTarget + HSAILCommonTableGen + ) Index: lib/Target/HSAIL/TargetInfo/HSAILTargetInfo.cpp =================================================================== --- /dev/null +++ lib/Target/HSAIL/TargetInfo/HSAILTargetInfo.cpp @@ -0,0 +1,27 @@ +//===-- HSAILTargetInfo.cpp - HSAIL Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HSAIL.h" + +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target llvm::TheHSAIL_32Target; +Target llvm::TheHSAIL_64Target; + +extern "C" void LLVMInitializeHSAILTargetInfo() { + RegisterTarget Target32( + TheHSAIL_32Target, "hsail", + "32-bit HSAIL: small machine model, addresses are 32 bit"); + + RegisterTarget Target64( + TheHSAIL_64Target, "hsail64", + "64-bit HSAIL: large machine model, addresses are 64 bit"); +} Index: lib/Target/HSAIL/TargetInfo/LLVMBuild.txt =================================================================== --- /dev/null +++ lib/Target/HSAIL/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/HSAIL/TargetInfo/LLVMBuild.txt -----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HSAILInfo +parent = HSAIL +required_libraries = MC Support Target +add_to_library_groups = HSAIL Index: lib/Target/LLVMBuild.txt =================================================================== --- lib/Target/LLVMBuild.txt +++ lib/Target/LLVMBuild.txt @@ -25,6 +25,7 @@ BPF CppBackend Hexagon + HSAIL MSP430 NVPTX Mips Index: test/CodeGen/HSAIL/128bit-kernel-args.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/128bit-kernel-args.ll @@ -0,0 +1,35 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &v4i32_kernel_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: st_global +; HSAIL-DAG: st_global +; HSAIL-DAG: st_global +; HSAIL-DAG: st_global +; HSAIL: ret; +define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { +entry: + store <4 x i32> %in, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v4f32_kernel_args +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: st_global +; HSAIL-DAG: st_global +; HSAIL-DAG: st_global +; HSAIL-DAG: st_global +; HSAIL: ret; +define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) { +entry: + store <4 x float> %in, <4 x float> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/32-bit-local-address-space.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/32-bit-local-address-space.ll @@ -0,0 +1,116 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &local_address_load +; HSAIL: ld_group_align(4)_u32 {{\$s[0-9]}}, {{\[\$s[0-9]+\]}} +define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %0 = load i32, i32 addrspace(3)* %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &local_address_gep +; HSAIL: ld_group_align(4)_u32 {{\$s[0-9]}}, {{\[\$s[0-9]+\]}} +define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) { +entry: + %0 = getelementptr i32, i32 addrspace(3)* %in, i32 %offset + %1 = load i32, i32 addrspace(3)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &local_address_gep_const_offset +; HSAIL: ld_group_align(4)_u32 {{\$s[0-9]}}, {{\[\$s[0-9]+\+4\]}} +define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %0 = getelementptr i32, i32 addrspace(3)* %in, i32 1 + %1 = load i32, i32 addrspace(3)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; Offset too large, can't fold into 16-bit immediate offset. +; HSAIL-LABEL: {{^}}prog function &local_address_gep_large_const_offset +; HSAIL: ld_group_align(4)_u32 {{\$s[0-9]}}, {{\[\$s[0-9]+\+65540\]}} +define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { +entry: + %0 = getelementptr i32, i32 addrspace(3)* %in, i32 16385 + %1 = load i32, i32 addrspace(3)* %0 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &null_32bit_lds_ptr +; HSAIL: cmp_ne_b1_s32 +; HSAIL-NOT: cmp_ne +; HSAIL: cmov_b32 +define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind { + %cmp = icmp ne i32 addrspace(3)* %lds, null + %x = select i1 %cmp, i32 123, i32 456 + store i32 %x, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &mul_32bit_ptr +; HSAIL: mul_u32 [[REG:\$s[0-9]+]], {{\$s[0-9]+}}, 12; +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]}}, {{\[}}[[REG]]{{\]}} +define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) { + %ptr = getelementptr [3 x float], [3 x float] addrspace(3)* %lds, i32 %tid, i32 0 + %val = load float, float addrspace(3)* %ptr + store float %val, float addrspace(1)* %out + ret void +} + +@g_lds = addrspace(3) global float zeroinitializer, align 4 + +; HSAIL-LABEL: {{^}}prog function &infer_ptr_alignment_global_offset +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]}}, [%g_lds]; +define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) { + %val = load float, float addrspace(3)* @g_lds + store float %val, float addrspace(1)* %out + ret void +} + + +@ptr = addrspace(3) global i32 addrspace(3)* null +@dst = addrspace(3) global [16384 x i32] zeroinitializer + +; HSAIL-LABEL: {{^}}prog function &global_ptr +; HSAIL: lda_group_u32 [[REG:\$s[0-9]+]], [%dst][64]; +; HSAIL: st_group_align(4)_u32 [[REG]], [%ptr] +define void @global_ptr() nounwind { + store i32 addrspace(3)* getelementptr ([16384 x i32], [16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr + ret void +} + +; HSAIL-LABEL: {{^}}prog function &local_address_store +; HSAIL: st_group_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}] +define void @local_address_store(i32 addrspace(3)* %out, i32 %val) { + store i32 %val, i32 addrspace(3)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &local_address_gep_store +; HSAIL: st_group_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}] +define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) { + %gep = getelementptr i32, i32 addrspace(3)* %out, i32 %offset + store i32 %val, i32 addrspace(3)* %gep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &local_address_gep_const_offset_store +; HSAIL: st_group_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+\+4}}] +define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) { + %gep = getelementptr i32, i32 addrspace(3)* %out, i32 1 + store i32 %val, i32 addrspace(3)* %gep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &local_address_gep_large_const_offset_store +; HSAIL: st_group_align(4)_u32 {{\$s[0-9]}}, {{\[\$s[0-9]+\+65540\]}} +define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) { + %gep = getelementptr i32, i32 addrspace(3)* %out, i32 16385 + store i32 %val, i32 addrspace(3)* %gep, align 4 + ret void +} Index: test/CodeGen/HSAIL/64bit-kernel-args.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/64bit-kernel-args.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &f64_kernel_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: ld_arg +; HSAIL-DAG: st_global +; HSAIL: ret; + +define void @f64_kernel_arg(double addrspace(1)* %out, double %in) { +entry: + store double %in, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/abs.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/abs.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &test_xor_abs_pat_i32( +; HSAIL: ld_arg_align(4)_u32 [[VAL:\$s[0-9]+]], [%x]; +; HSAIL: abs_s32 [[ABS:\$s[0-9]+]], [[VAL]]; +; HSAIL: st_arg_align(4)_u32 [[ABS]] +define i32 @test_xor_abs_pat_i32(i32 %x) #0 { + %sra = ashr i32 %x, 31 + %add = add i32 %sra, %x + %xor = xor i32 %sra, %add + ret i32 %xor +} + +; HSAIL-LABEL: {{^}}prog function &test_xor_abs_pat_i64( +; HSAIL: ld_arg_align(8)_u64 [[VAL:\$d[0-9]+]], [%x]; +; HSAIL: abs_s64 [[ABS:\$d[0-9]+]], [[VAL]]; +; HSAIL: st_arg_align(8)_u64 [[ABS]] +define i64 @test_xor_abs_pat_i64(i64 %x) #0 { + %sra = ashr i64 %x, 63 + %add = add i64 %sra, %x + %xor = xor i64 %sra, %add + ret i64 %xor +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/activelanecount.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/activelanecount.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.activelanecount(i32, i1) #1 + +declare i32 @llvm.HSAIL.activelanecount.u32.b1(i1) #0 +declare i32 @llvm.HSAIL.activelanecount.width.u32.b1(i1) #0 + +; HSAIL-LABEL: {{^}}prog function &test_activelanecount_u32( +; HSAIL: activelanecount_u32_b1 {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanecount_u32(i32 addrspace(1)* %out, i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call i32 @llvm.hsail.activelanecount(i32 1, i1 %cmp) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanecount_wavesize_u32( +; HSAIL: activelanecount_width(WAVESIZE)_u32_b1 {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanecount_wavesize_u32(i32 addrspace(1)* %out, i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call i32 @llvm.hsail.activelanecount(i32 33, i1 %cmp) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanecount_all_u32( +; HSAIL: activelanecount_width(all)_u32_b1 {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanecount_all_u32(i32 addrspace(1)* %out, i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call i32 @llvm.hsail.activelanecount(i32 34, i1 %cmp) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanecount_u32_imm( +; HSAIL: activelanecount_u32_b1 {{\$s[0-9]+}}, 1; +define void @test_activelanecount_u32_imm(i32 addrspace(1)* %out) #0 { + %tmp = call i32 @llvm.hsail.activelanecount(i32 1, i1 true) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelanecount_u32( +; HSAIL: activelanecount_u32_b1 {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_legacy_activelanecount_u32(i32 addrspace(1)* %out, i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call i32 @llvm.HSAIL.activelanecount.u32.b1(i1 %cmp) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelanecount_width_u32( +; HSAIL: activelanecount_width(WAVESIZE)_u32_b1 {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_legacy_activelanecount_width_u32(i32 addrspace(1)* %out, i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call i32 @llvm.HSAIL.activelanecount.width.u32.b1(i1 %cmp) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly convergent } Index: test/CodeGen/HSAIL/activelaneid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/activelaneid.ll @@ -0,0 +1,49 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.activelaneid(i32) #0 + +declare i32 @llvm.HSAIL.activelaneid.u32() #0 +declare i32 @llvm.HSAIL.activelaneid.width.u32() #0 + +; HSAIL-LABEL: {{^}}prog function &test_activelaneid_u32( +; HSAIL: activelaneid_u32 {{\$s[0-9]+}}; +define void @test_activelaneid_u32(i32 addrspace(1)* %out) #0 { + %tmp = call i32 @llvm.hsail.activelaneid(i32 1) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelaneid_wavesize_u32( +; HSAIL: activelaneid_width(WAVESIZE)_u32 {{\$s[0-9]+}}; +define void @test_activelaneid_wavesize_u32(i32 addrspace(1)* %out) #0 { + %tmp = call i32 @llvm.hsail.activelaneid(i32 33) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelaneid_all_u32( +; HSAIL: activelaneid_width(all)_u32 {{\$s[0-9]+}}; +define void @test_activelaneid_all_u32(i32 addrspace(1)* %out) #0 { + %tmp = call i32 @llvm.hsail.activelaneid(i32 34) #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelaneid_u32( +; HSAIL: activelaneid_u32 {{\$s[0-9]+}}; +define void @test_legacy_activelaneid_u32(i32 addrspace(1)* %out) #0 { + %tmp = call i32 @llvm.HSAIL.activelaneid.u32() #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelaneid_width_u32( +; HSAIL: activelaneid_width(WAVESIZE)_u32 {{\$s[0-9]+}}; +define void @test_legacy_activelaneid_width_u32(i32 addrspace(1)* %out) #0 { + %tmp = call i32 @llvm.HSAIL.activelaneid.width.u32() #0 + store i32 %tmp, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } Index: test/CodeGen/HSAIL/activelanemask.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/activelanemask.ll @@ -0,0 +1,129 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; These really return structs of 4 x i64 + +declare { i64, i64, i64, i64 } @llvm.hsail.activelanemask(i32, i1) #1 + +declare { i64, i64, i64, i64 } @llvm.HSAIL.activelanemask.v4.b64.b1(i1) #0 +declare { i64, i64, i64, i64 } @llvm.HSAIL.activelanemask.v4.width.b64.b1(i1) #0 + +; HSAIL-LABEL: {{^}}prog function &test_activelanemask_b64( +; HSAIL: activelanemask_v4_b64_b1 ({{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}), {{\$c[0-9]+}}; +define void @test_activelanemask_b64(i64 addrspace(1)* %out0, + i64 addrspace(1)* %out1, + i64 addrspace(1)* %out2, + i64 addrspace(1)* %out3, + i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call { i64, i64, i64, i64 } @llvm.hsail.activelanemask(i32 1, i1 %cmp) #0 + %elt0 = extractvalue { i64, i64, i64, i64 } %tmp, 0 + %elt1 = extractvalue { i64, i64, i64, i64 } %tmp, 1 + %elt2 = extractvalue { i64, i64, i64, i64 } %tmp, 2 + %elt3 = extractvalue { i64, i64, i64, i64 } %tmp, 3 + store i64 %elt0, i64 addrspace(1)* %out0 + store i64 %elt1, i64 addrspace(1)* %out1 + store i64 %elt2, i64 addrspace(1)* %out2 + store i64 %elt3, i64 addrspace(1)* %out3 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanemask_wavesize_b64( +; HSAIL: activelanemask_v4_width(WAVESIZE)_b64_b1 ({{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}), {{\$c[0-9]+}}; +define void @test_activelanemask_wavesize_b64(i64 addrspace(1)* %out0, + i64 addrspace(1)* %out1, + i64 addrspace(1)* %out2, + i64 addrspace(1)* %out3, + i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call { i64, i64, i64, i64 } @llvm.hsail.activelanemask(i32 33, i1 %cmp) #0 + %elt0 = extractvalue { i64, i64, i64, i64 } %tmp, 0 + %elt1 = extractvalue { i64, i64, i64, i64 } %tmp, 1 + %elt2 = extractvalue { i64, i64, i64, i64 } %tmp, 2 + %elt3 = extractvalue { i64, i64, i64, i64 } %tmp, 3 + store i64 %elt0, i64 addrspace(1)* %out0 + store i64 %elt1, i64 addrspace(1)* %out1 + store i64 %elt2, i64 addrspace(1)* %out2 + store i64 %elt3, i64 addrspace(1)* %out3 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanemask_b64_imm( +; HSAIL: activelanemask_v4_b64_b1 ({{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}), 1; +define void @test_activelanemask_b64_imm(i64 addrspace(1)* %out0, + i64 addrspace(1)* %out1, + i64 addrspace(1)* %out2, + i64 addrspace(1)* %out3) #0 { + %tmp = call { i64, i64, i64, i64 } @llvm.hsail.activelanemask(i32 1, i1 true) #0 + %elt0 = extractvalue { i64, i64, i64, i64 } %tmp, 0 + %elt1 = extractvalue { i64, i64, i64, i64 } %tmp, 1 + %elt2 = extractvalue { i64, i64, i64, i64 } %tmp, 2 + %elt3 = extractvalue { i64, i64, i64, i64 } %tmp, 3 + store i64 %elt0, i64 addrspace(1)* %out0 + store i64 %elt1, i64 addrspace(1)* %out1 + store i64 %elt2, i64 addrspace(1)* %out2 + store i64 %elt3, i64 addrspace(1)* %out3 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanemask_width_b64( +; HSAIL: activelanemask_v4_width(WAVESIZE)_b64_b1 ({{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}), {{\$c[0-9]+}}; +define void @test_activelanemask_width_b64(i64 addrspace(1)* %out0, + i64 addrspace(1)* %out1, + i64 addrspace(1)* %out2, + i64 addrspace(1)* %out3, + i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call { i64, i64, i64, i64 } @llvm.HSAIL.activelanemask.v4.width.b64.b1(i1 %cmp) #0 + %elt0 = extractvalue { i64, i64, i64, i64 } %tmp, 0 + %elt1 = extractvalue { i64, i64, i64, i64 } %tmp, 1 + %elt2 = extractvalue { i64, i64, i64, i64 } %tmp, 2 + %elt3 = extractvalue { i64, i64, i64, i64 } %tmp, 3 + store i64 %elt0, i64 addrspace(1)* %out0 + store i64 %elt1, i64 addrspace(1)* %out1 + store i64 %elt2, i64 addrspace(1)* %out2 + store i64 %elt3, i64 addrspace(1)* %out3 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelanemask_b64( +; HSAIL: activelanemask_v4_b64_b1 ({{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}), {{\$c[0-9]+}}; +define void @test_legacy_activelanemask_b64(i64 addrspace(1)* %out0, + i64 addrspace(1)* %out1, + i64 addrspace(1)* %out2, + i64 addrspace(1)* %out3, + i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call { i64, i64, i64, i64 } @llvm.HSAIL.activelanemask.v4.b64.b1(i1 %cmp) #0 + %elt0 = extractvalue { i64, i64, i64, i64 } %tmp, 0 + %elt1 = extractvalue { i64, i64, i64, i64 } %tmp, 1 + %elt2 = extractvalue { i64, i64, i64, i64 } %tmp, 2 + %elt3 = extractvalue { i64, i64, i64, i64 } %tmp, 3 + store i64 %elt0, i64 addrspace(1)* %out0 + store i64 %elt1, i64 addrspace(1)* %out1 + store i64 %elt2, i64 addrspace(1)* %out2 + store i64 %elt3, i64 addrspace(1)* %out3 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelanemask_width_b64( +; HSAIL: activelanemask_v4_width(WAVESIZE)_b64_b1 ({{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}), {{\$c[0-9]+}}; +define void @test_legacy_activelanemask_width_b64(i64 addrspace(1)* %out0, + i64 addrspace(1)* %out1, + i64 addrspace(1)* %out2, + i64 addrspace(1)* %out3, + i32 %x) #0 { + %cmp = icmp eq i32 %x, 0 + %tmp = call { i64, i64, i64, i64 } @llvm.HSAIL.activelanemask.v4.width.b64.b1(i1 %cmp) #0 + %elt0 = extractvalue { i64, i64, i64, i64 } %tmp, 0 + %elt1 = extractvalue { i64, i64, i64, i64 } %tmp, 1 + %elt2 = extractvalue { i64, i64, i64, i64 } %tmp, 2 + %elt3 = extractvalue { i64, i64, i64, i64 } %tmp, 3 + store i64 %elt0, i64 addrspace(1)* %out0 + store i64 %elt1, i64 addrspace(1)* %out1 + store i64 %elt2, i64 addrspace(1)* %out2 + store i64 %elt3, i64 addrspace(1)* %out3 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly convergent } Index: test/CodeGen/HSAIL/activelanepermute.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/activelanepermute.ll @@ -0,0 +1,562 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.activelanepermute.i32(i32, i32, i32, i32, i1) #1 +declare i64 @llvm.hsail.activelanepermute.i64(i32, i64, i32, i64, i1) #1 + +declare i32 @llvm.HSAIL.activelanepermute.b32(i32, i32, i32, i1) #0 +declare i64 @llvm.HSAIL.activelanepermute.b64(i64, i32, i64, i1) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelanepermute_b32_rrrr( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_legacy_activelanepermute_b32_rrrr(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.HSAIL.activelanepermute.b32(i32 %src0, i32 %src1, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelanepermute_b64_rrrr( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_legacy_activelanepermute_b64_rrrr(i64 addrspace(1)* %out, i64 %src0, i32 %src1, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.HSAIL.activelanepermute.b64(i64 %src0, i32 %src1, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelanepermute_width_b32_rrrr( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_legacy_activelanepermute_width_b32_rrrr(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.HSAIL.activelanepermute.width.b32(i32 %src0, i32 %src1, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_activelanepermute_width_b64_rrrr( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_legacy_activelanepermute_width_b64_rrrr(i64 addrspace(1)* %out, i64 %src0, i32 %src1, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.HSAIL.activelanepermute.width.b64(i64 %src0, i32 %src1, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; -------------------------------------------------------------------------------- + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_iiii( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, 0, 0, 0, 0; +define void @test_activelanepermute_b32_iiii(i32 addrspace(1)* %out) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 0, i32 0, i32 0, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_rrrr( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_b32_rrrr(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 %src0, i32 %src1, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_irrr( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, 10, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_b32_irrr(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 10, i32 %src1, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_rirr( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_b32_rirr(i32 addrspace(1)* %out, i32 %src0, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 %src0, i32 7, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_rrir( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 3, {{\$c[0-9]+}}; +define void @test_activelanepermute_b32_rrir(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 %src0, i32 %src1, i32 3, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_rrri( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1; +define void @test_activelanepermute_b32_rrri(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 %src0, i32 %src1, i32 %src2, i1 true) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_iirr( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, 9, 13, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_b32_iirr(i32 addrspace(1)* %out, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 9, i32 13, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_riir( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 13, 56, {{\$c[0-9]+}}; +define void @test_activelanepermute_b32_riir(i32 addrspace(1)* %out, i32 %src0, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 %src0, i32 13, i32 56, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_rrii( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 23, 1; +define void @test_activelanepermute_b32_rrii(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 %src0, i32 %src1, i32 23, i1 true) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_irir( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, 11, {{\$s[0-9]+}}, 43, {{\$c[0-9]+}}; +define void @test_activelanepermute_b32_irir(i32 addrspace(1)* %out, i32 %src1, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 11, i32 %src1, i32 43, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_riri( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 11, {{\$s[0-9]+}}, 0; +define void @test_activelanepermute_b32_riri(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 %src0, i32 11, i32 %src2, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_irri( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, 17, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0; +define void @test_activelanepermute_b32_irri(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 17, i32 %src1, i32 %src2, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_iiir( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, 11, 9, 43, {{\$c[0-9]+}}; +define void @test_activelanepermute_b32_iiir(i32 addrspace(1)* %out, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 11, i32 9, i32 43, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_riii( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 11, 22, 0; +define void @test_activelanepermute_b32_riii(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 %src0, i32 11, i32 22, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_iiri( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, 3, 11, {{\$s[0-9]+}}, 0; +define void @test_activelanepermute_b32_iiri(i32 addrspace(1)* %out, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 3, i32 11, i32 %src2, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b32_irii( +; HSAIL: activelanepermute_b32 {{\$s[0-9]+}}, 11, {{\$s[0-9]+}}, 43, 1; +define void @test_activelanepermute_b32_irii(i32 addrspace(1)* %out, i32 %src1) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 1, i32 11, i32 %src1, i32 43, i1 true) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_iiii( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, 0, 0, 0, 0; +define void @test_activelanepermute_b64_iiii(i64 addrspace(1)* %out) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 0, i32 0, i64 0, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_rrrr( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_b64_rrrr(i64 addrspace(1)* %out, i64 %src0, i32 %src1, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 %src0, i32 %src1, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_irrr( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, 10, {{\$s[0-9]+}}, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_b64_irrr(i64 addrspace(1)* %out, i32 %src1, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 10, i32 %src1, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_rirr( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 7, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_b64_rirr(i64 addrspace(1)* %out, i64 %src0, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 %src0, i32 7, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_rrir( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, 3, {{\$c[0-9]+}}; +define void @test_activelanepermute_b64_rrir(i64 addrspace(1)* %out, i64 %src0, i32 %src1, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 %src0, i32 %src1, i64 3, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_rrri( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}, 1; +define void @test_activelanepermute_b64_rrri(i64 addrspace(1)* %out, i64 %src0, i32 %src1, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 %src0, i32 %src1, i64 %src2, i1 true) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_iirr( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, 9, 13, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_b64_iirr(i64 addrspace(1)* %out, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 9, i32 13, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_riir( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 13, 56, {{\$c[0-9]+}}; +define void @test_activelanepermute_b64_riir(i64 addrspace(1)* %out, i64 %src0, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 %src0, i32 13, i64 56, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_rrii( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, 23, 1; +define void @test_activelanepermute_b64_rrii(i64 addrspace(1)* %out, i64 %src0, i32 %src1) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 %src0, i32 %src1, i64 23, i1 true) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_irir( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, 11, {{\$s[0-9]+}}, 43, {{\$c[0-9]+}}; +define void @test_activelanepermute_b64_irir(i64 addrspace(1)* %out, i32 %src1, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 11, i32 %src1, i64 43, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_riri( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 11, {{\$d[0-9]+}}, 0; +define void @test_activelanepermute_b64_riri(i64 addrspace(1)* %out, i64 %src0, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 %src0, i32 11, i64 %src2, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_irri( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, 17, {{\$s[0-9]+}}, {{\$d[0-9]+}}, 0; +define void @test_activelanepermute_b64_irri(i64 addrspace(1)* %out, i32 %src1, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 17, i32 %src1, i64 %src2, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_iiir( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, 11, 9, 43, {{\$c[0-9]+}}; +define void @test_activelanepermute_b64_iiir(i64 addrspace(1)* %out, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 11, i32 9, i64 43, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_riii( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 11, 22, 0; +define void @test_activelanepermute_b64_riii(i64 addrspace(1)* %out, i64 %src0, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 %src0, i32 11, i64 22, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_iiri( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, 3, 11, {{\$d[0-9]+}}, 0; +define void @test_activelanepermute_b64_iiri(i64 addrspace(1)* %out, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 3, i32 11, i64 %src2, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_b64_irii( +; HSAIL: activelanepermute_b64 {{\$d[0-9]+}}, 11, {{\$s[0-9]+}}, 43, 1; +define void @test_activelanepermute_b64_irii(i64 addrspace(1)* %out, i32 %src1) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 1, i64 11, i32 %src1, i64 43, i1 true) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; -------------------------------------------------------------------------------- + +declare i32 @llvm.HSAIL.activelanepermute.width.b32(i32, i32, i32, i1) #0 +declare i64 @llvm.HSAIL.activelanepermute.width.b64(i64, i32, i64, i1) #0 + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_iiii( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, 0, 0, 0, 0; +define void @test_activelanepermute_wavesize_b32_iiii(i32 addrspace(1)* %out) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 0, i32 0, i32 0, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_rrrr( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b32_rrrr(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 %src0, i32 %src1, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_irrr( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, 10, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b32_irrr(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 10, i32 %src1, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_rirr( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b32_rirr(i32 addrspace(1)* %out, i32 %src0, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 %src0, i32 7, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_rrir( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 3, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b32_rrir(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 %src0, i32 %src1, i32 3, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_rrri( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1; +define void @test_activelanepermute_wavesize_b32_rrri(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 %src0, i32 %src1, i32 %src2, i1 true) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_iirr( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, 9, 13, {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b32_iirr(i32 addrspace(1)* %out, i32 %src2, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 9, i32 13, i32 %src2, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_riir( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 13, 56, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b32_riir(i32 addrspace(1)* %out, i32 %src0, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 %src0, i32 13, i32 56, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_rrii( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 23, 1; +define void @test_activelanepermute_wavesize_b32_rrii(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 %src0, i32 %src1, i32 23, i1 true) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_irir( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, 11, {{\$s[0-9]+}}, 43, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b32_irir(i32 addrspace(1)* %out, i32 %src1, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 11, i32 %src1, i32 43, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_riri( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 11, {{\$s[0-9]+}}, 0; +define void @test_activelanepermute_wavesize_b32_riri(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 %src0, i32 11, i32 %src2, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_irri( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, 17, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0; +define void @test_activelanepermute_wavesize_b32_irri(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 17, i32 %src1, i32 %src2, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_iiir( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, 11, 9, 43, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b32_iiir(i32 addrspace(1)* %out, i1 %src3) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 11, i32 9, i32 43, i1 %src3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_riii( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 11, 22, 0; +define void @test_activelanepermute_wavesize_b32_riii(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 %src0, i32 11, i32 22, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_iiri( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, 3, 11, {{\$s[0-9]+}}, 0; +define void @test_activelanepermute_wavesize_b32_iiri(i32 addrspace(1)* %out, i32 %src2) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 3, i32 11, i32 %src2, i1 false) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b32_irii( +; HSAIL: activelanepermute_width(WAVESIZE)_b32 {{\$s[0-9]+}}, 11, {{\$s[0-9]+}}, 43, 1; +define void @test_activelanepermute_wavesize_b32_irii(i32 addrspace(1)* %out, i32 %src1) #0 { + %tmp0 = call i32 @llvm.hsail.activelanepermute.i32(i32 33, i32 11, i32 %src1, i32 43, i1 true) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_iiii( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, 0, 0, 0, 0; +define void @test_activelanepermute_wavesize_b64_iiii(i64 addrspace(1)* %out) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 0, i32 0, i64 0, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_rrrr( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b64_rrrr(i64 addrspace(1)* %out, i64 %src0, i32 %src1, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 %src0, i32 %src1, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_irrr( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, 10, {{\$s[0-9]+}}, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b64_irrr(i64 addrspace(1)* %out, i32 %src1, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 10, i32 %src1, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_rirr( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 7, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b64_rirr(i64 addrspace(1)* %out, i64 %src0, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 %src0, i32 7, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_rrir( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, 3, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b64_rrir(i64 addrspace(1)* %out, i64 %src0, i32 %src1, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 %src0, i32 %src1, i64 3, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_rrri( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}, 1; +define void @test_activelanepermute_wavesize_b64_rrri(i64 addrspace(1)* %out, i64 %src0, i32 %src1, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 %src0, i32 %src1, i64 %src2, i1 true) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_iirr( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, 9, 13, {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b64_iirr(i64 addrspace(1)* %out, i64 %src2, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 9, i32 13, i64 %src2, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_riir( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 13, 56, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b64_riir(i64 addrspace(1)* %out, i64 %src0, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 %src0, i32 13, i64 56, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_rrii( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, 23, 1; +define void @test_activelanepermute_wavesize_b64_rrii(i64 addrspace(1)* %out, i64 %src0, i32 %src1) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 %src0, i32 %src1, i64 23, i1 true) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_irir( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, 11, {{\$s[0-9]+}}, 43, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b64_irir(i64 addrspace(1)* %out, i32 %src1, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 11, i32 %src1, i64 43, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_riri( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 11, {{\$d[0-9]+}}, 0; +define void @test_activelanepermute_wavesize_b64_riri(i64 addrspace(1)* %out, i64 %src0, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 %src0, i32 11, i64 %src2, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_irri( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, 17, {{\$s[0-9]+}}, {{\$d[0-9]+}}, 0; +define void @test_activelanepermute_wavesize_b64_irri(i64 addrspace(1)* %out, i32 %src1, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 17, i32 %src1, i64 %src2, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_iiir( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, 11, 9, 43, {{\$c[0-9]+}}; +define void @test_activelanepermute_wavesize_b64_iiir(i64 addrspace(1)* %out, i1 %src3) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 11, i32 9, i64 43, i1 %src3) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_riii( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 11, 22, 0; +define void @test_activelanepermute_wavesize_b64_riii(i64 addrspace(1)* %out, i64 %src0, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 %src0, i32 11, i64 22, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_iiri( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, 3, 11, {{\$d[0-9]+}}, 0; +define void @test_activelanepermute_wavesize_b64_iiri(i64 addrspace(1)* %out, i64 %src2) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 3, i32 11, i64 %src2, i1 false) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_activelanepermute_wavesize_b64_irii( +; HSAIL: activelanepermute_width(WAVESIZE)_b64 {{\$d[0-9]+}}, 11, {{\$s[0-9]+}}, 43, 1; +define void @test_activelanepermute_wavesize_b64_irii(i64 addrspace(1)* %out, i32 %src1) #0 { + %tmp0 = call i64 @llvm.hsail.activelanepermute.i64(i32 33, i64 11, i32 %src1, i64 43, i1 true) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #0 = { nounwind readonly convergent } Index: test/CodeGen/HSAIL/add.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/add.ll @@ -0,0 +1,179 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.HSAIL.get.global.id(i32) readnone + +; FUNC-LABEL: {{^}}prog function &test1 +; HSAIL: add_u32 +define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: prog function &test2 +; HSAIL: add_u32 +; HSAIL: add_u32 +define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1)* %in + %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr + %result = add <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: prog function &test4 +; HSAL: add_u32 +; HSAL: add_u32 +; HSAL: add_u32 +; HSAL: add_u32 +define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1)* %in + %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr + %result = add <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: prog function &test8 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { +entry: + %0 = add <8 x i32> %a, %b + store <8 x i32> %0, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: prog function &test16 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +; HSAIL: add_u32 +define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { +entry: + %0 = add <16 x i32> %a, %b + store <16 x i32> %0, <16 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: prog function &add64 +; HSAIL: add_u64 +define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +entry: + %0 = add i64 %a, %b + store i64 %0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &add_r_i_i32 +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 123; +define void @add_r_i_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %a = load i32, i32 addrspace(1)* %in + %result = add i32 %a, 123 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &add_r_i_i64 +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 123; +define void @add_r_i_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %a = load i64, i64 addrspace(1)* %in + %result = add i64 %a, 123 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_vreg +; HSAIL: workitemabsid_u32 {{\$s[0-9]+}}, 0; +; HSAIL: shl_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 3; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) readnone + %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid + %a = load i64, i64 addrspace(1)* %a_ptr + %b = load i64, i64 addrspace(1)* %b_ptr + %result = add i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sgpr_operand +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) { + %foo = load i64, i64 addrspace(1)* %in, align 8 + %result = add i64 %foo, %a + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sgpr_operand_reversed +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) { + %foo = load i64, i64 addrspace(1)* %in, align 8 + %result = add i64 %a, %foo + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_v2i64_sreg +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) { + %result = add <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_v2i64_vreg +; HSAIL: workitemabsid_u32 {{\$s[0-9]+}}, 0; +; HSAIL: shl_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 4; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) readnone + %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid + %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr + %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr + %result = add <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &trunc_i64_add_to_i32 +; HSAIL: add_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { + %add = add i64 %b, %a + %trunc = trunc i64 %add to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 8 + ret void +} Index: test/CodeGen/HSAIL/addressing-modes.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/addressing-modes.ll @@ -0,0 +1,157 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.HSAIL.workitemid.flat() #0 + +@lds_global = addrspace(3) global [128 x i32] undef, align 4 + +; FUNC-LABEL: {{^}}prog function &directly_load_arg +; HSAIL: ld_arg_align(4)_u32 [[ADDRREG:\$s[0-9]]], [%in]; +; HSAIL: ld_global_align(4)_u32 [[REG:\$s[0-9]]], {{\[}}[[ADDRREG]]{{\]}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +; HSAIL: ret; +define void @directly_load_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %a = load i32, i32 addrspace(1)* %in + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &positive_imm_offset_arg +; HSAIL: ld_arg_align(4)_u32 [[ADDRREG:\$s[0-9]]], [%in]; +; HSAIL: ld_global_align(4)_u32 [[REG:\$s[0-9]]], {{\[}}[[ADDRREG]]+28{{\]}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +; HSAIL: ret; +define void @positive_imm_offset_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 7 + %a = load i32, i32 addrspace(1)* %gep + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &negative_imm_offset_arg +; HSAIL: ld_arg_align(4)_u32 [[ADDRREG:\$s[0-9]]], [%in]; +; HSAIL: ld_global_align(4)_u32 [[REG:\$s[0-9]]], {{\[}}[[ADDRREG]]-36{{\]}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +; HSAIL: ret; +define void @negative_imm_offset_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 -9 + %a = load i32, i32 addrspace(1)* %gep + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &directly_load_kernarg +; HSAIL: ld_kernarg_align(4)_width(all)_u32 [[KERNARG:\$s[0-9]]], [%in]; +; HSAIL: ld_global_align(4)_u32 [[REG:\$s[0-9]]], {{\[}}[[KERNARG]]{{\]}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +define spir_kernel void @directly_load_kernarg(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %a = load i32, i32 addrspace(1)* %in + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &positive_imm_offset_kernarg +; HSAIL: ld_kernarg_align(4)_width(all)_u32 [[KERNARG:\$s[0-9]]], [%in]; +; HSAIL: ld_global_align(4)_u32 [[REG:\$s[0-9]]], {{\[}}[[KERNARG]]+28{{\]}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +define spir_kernel void @positive_imm_offset_kernarg(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 7 + %a = load i32, i32 addrspace(1)* %gep + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &negative_imm_offset_kernarg +; HSAIL: ld_kernarg_align(4)_width(all)_u32 [[KERNARG:\$s[0-9]]], [%in]; +; HSAIL: ld_global_align(4)_u32 [[REG:\$s[0-9]]], {{\[}}[[KERNARG]]-36{{\]}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +define spir_kernel void @negative_imm_offset_kernarg(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 -9 + %a = load i32, i32 addrspace(1)* %gep + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &directly_load_global +; HSAIL: group_u32 %lds_global[128]; +; HSAIL: ld_group_align(4)_u32 [[REG:\$s[0-9]]], [%lds_global]; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +define void @directly_load_global(i32 addrspace(1)* %out) { + %a = load i32, i32 addrspace(3)* getelementptr inbounds ([128 x i32], [128 x i32] addrspace(3)* @lds_global, i32 0, i32 0) + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &positive_imm_offset_global +; HSAIL: group_u32 %lds_global[128]; +; HSAIL: ld_group_align(4)_u32 [[REG:\$s[0-9]]], [%lds_global][28]; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +define void @positive_imm_offset_global(i32 addrspace(1)* %out) { + %a = load i32, i32 addrspace(3)* getelementptr inbounds ([128 x i32], [128 x i32] addrspace(3)* @lds_global, i32 0, i32 7) + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &negative_imm_offset_global +; HSAIL: group_u32 %lds_global[128]; +; HSAIL: lda_group_u32 [[REG:\$s[0-9]]], [%lds_global][-36]; +; HSAIL: st_global_align(4)_u32 [[REG]], +define void @negative_imm_offset_global(i32 addrspace(3)* addrspace(1)* %out) { + %gep = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @lds_global, i32 0, i32 -9 + store i32 addrspace(3)* %gep, i32 addrspace(3)* addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function ®_offset_global +; HSAIL: group_u32 %lds_global[128]; +; HSAIL: workitemflatid_u32 [[TID:\$s[0-9]+]]; +; HSAIL: ld_group_align(4)_u32 [[REG:\$s[0-9]]], [%lds_global]{{\[}}[[TID]]{{\]}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +define void @reg_offset_global(i32 addrspace(1)* %out) { + %tid = call i32 @llvm.HSAIL.workitemid.flat() #0 + %gep = getelementptr [128 x i32], [128 x i32] addrspace(3)* @lds_global, i32 0, i32 %tid + %a = load i32, i32 addrspace(3)* %gep + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function ®_offset_positive_offset_global +; HSAIL: group_u32 %lds_global[128]; +; HSAIL: workitemflatid_u32 [[TID:\$s[0-9]+]]; +; HSAIL: ld_group_align(4)_u32 [[REG:\$s[0-9]]], [%lds_global]{{\[}}[[TID]]+48]; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +define void @reg_offset_positive_offset_global(i32 addrspace(1)* %out) { + %tid = call i32 @llvm.HSAIL.workitemid.flat() #0 + %tid_offset = add i32 %tid, 12 + %gep = getelementptr [128 x i32], [128 x i32] addrspace(3)* @lds_global, i32 0, i32 %tid_offset + %a = load i32, i32 addrspace(3)* %gep + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function ®_offset_negative_offset_global +; HSAIL: group_u32 %lds_global[128]; +; HSAIL: workitemflatid_u32 [[TID:\$s[0-9]+]]; +; HSAIL: ld_group_align(4)_u32 [[REG:\$s[0-9]]], [%lds_global]{{\[}}[[TID]]-60]; +; HSAIL: add_u32 {{\$s[0-9]+}}, [[REG]], [[REG]]; +define void @reg_offset_negative_offset_global(i32 addrspace(1)* %out) { + %tid = call i32 @llvm.HSAIL.workitemid.flat() #0 + %tid_offset = add i32 %tid, -15 + %gep = getelementptr [128 x i32], [128 x i32] addrspace(3)* @lds_global, i32 0, i32 %tid_offset + %a = load i32, i32 addrspace(3)* %gep + %result = add i32 %a, %a + store i32 %result, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/addrspacecast.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/addrspacecast.ll @@ -0,0 +1,125 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL32 -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_global_to_flat( +; HSAIL32: ld_arg_align(4)_u32 [[PTR:\$s[0-9]]], [%ptr]; +; HSAIL32-NEXT: st_arg_align(4)_u32 [[PTR]] + +; HSAIL64: ld_arg_align(8)_u64 [[PTR:\$d[0-9]]], [%ptr]; +; HSAIL64-NEXT: st_arg_align(8)_u64 [[PTR]] +define i8 addrspace(4)* @test_addrspacecast_global_to_flat(i8 addrspace(1)* %ptr) #0 { + %val = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)* + ret i8 addrspace(4)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_flat_to_global( +; HSAIL32: ld_arg_align(4)_u32 [[PTR:\$s[0-9]]], [%ptr]; +; HSAIL32-NEXT: st_arg_align(4)_u32 [[PTR]] + +; HSAIL64: ld_arg_align(8)_u64 [[PTR:\$d[0-9]]], [%ptr]; +; HSAIL64-NEXT: st_arg_align(8)_u64 [[PTR]] +define i8 addrspace(1)* @test_addrspacecast_flat_to_global(i8 addrspace(4)* %ptr) #0 { + %val = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(1)* + ret i8 addrspace(1)* %val +} + +; FIXME: Immediate should be stored directly +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_global_to_flat_null_imm( +; HSAIL32: mov_b32 [[ZERO:\$s[0-9]+]], 0; +; HSAIL32-NEXT: st_arg_align(4)_u32 [[ZERO]] + +; HSAIL64: mov_b64 [[ZERO:\$d[0-9]+]], 0; +; HSAIL64: st_arg_align(8)_u64 [[ZERO]] +define i8 addrspace(4)* @test_addrspacecast_global_to_flat_null_imm() #0 { + %val = addrspacecast i8 addrspace(1)* null to i8 addrspace(4)* + ret i8 addrspace(4)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_global_to_flat_ptrtoint_inttoptr( +; HSAIL32: mov_b32 [[PTR:\$s[0-9]+]], 12345; +; HSAIL32-NEXT: st_arg_align(4)_u32 [[PTR]] + +; HSAIL64: mov_b64 [[PTR:\$d[0-9]+]], 12345; +; HSAIL64-NEXT: st_arg_align(8)_u64 [[PTR]] +define i8 addrspace(4)* @test_addrspacecast_global_to_flat_ptrtoint_inttoptr() #0 { + %ptr = inttoptr i32 12345 to i8 addrspace(1)* + %val = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(4)* + ret i8 addrspace(4)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_flat_to_global_inttoptr( +; HSAIL32: mov_b32 [[PTR:\$s[0-9]+]], 12345; +; HSAIL32-NEXT: st_arg_align(4)_u32 [[PTR]] + +; HSAIL64: mov_b64 [[PTR:\$d[0-9]+]], 12345; +; HSAIL64-NEXT: st_arg_align(8)_u64 [[PTR]] +define i8 addrspace(1)* @test_addrspacecast_flat_to_global_inttoptr() #0 { + %ptr = inttoptr i32 12345 to i8 addrspace(4)* + %val = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(1)* + ret i8 addrspace(1)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_flat_to_global_null_imm( +; HSAIL32: mov_b32 [[PTR:\$s[0-9]+]], 0; +; HSAIL32-NEXT: st_arg_align(4)_u32 [[PTR]] + +; HSAIL64: mov_b64 [[PTR:\$d[0-9]+]], 0; +; HSAIL64-NEXT: st_arg_align(8)_u64 [[PTR]] +define i8 addrspace(1)* @test_addrspacecast_flat_to_global_null_imm() #0 { + %val = addrspacecast i8 addrspace(4)* null to i8 addrspace(1)* + ret i8 addrspace(1)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_group_to_flat( +; HSAIL32: ld_arg_align(4)_u32 [[PTR:\$s[0-9]]], [%ptr]; +; HSAIL32: stof_group_u32_u32 [[CAST:\$s[0-9]+]], [[PTR]]; +; HSAIL32: st_arg_align(4)_u32 [[CAST]] + +; HSAIL64: ld_arg_align(4)_u32 [[PTR:\$s[0-9]]], [%ptr]; +; HSAIL64: stof_group_u64_u32 [[CAST:\$d[0-9]+]], [[PTR]]; +; HSAIL64: st_arg_align(8)_u64 [[CAST]] +define i8 addrspace(4)* @test_addrspacecast_group_to_flat(i8 addrspace(3)* %ptr) #0 { + %val = addrspacecast i8 addrspace(3)* %ptr to i8 addrspace(4)* + ret i8 addrspace(4)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_flat_to_group( +; HSAIL32: ld_arg_align(4)_u32 [[PTR:\$s[0-9]]], [%ptr]; +; HSAIL32: ftos_group_u32_u32 [[CAST:\$s[0-9]+]], [[PTR]]; +; HSAIL32: st_arg_align(4)_u32 [[CAST]] + +; HSAIL64: ld_arg_align(8)_u64 [[PTR:\$d[0-9]]], [%ptr]; +; HSAIL64: ftos_group_u32_u64 [[CAST:\$s[0-9]+]], [[PTR]]; +; HSAIL64: st_arg_align(4)_u32 [[CAST]] +define i8 addrspace(3)* @test_addrspacecast_flat_to_group(i8 addrspace(4)* %ptr) #0 { + %val = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(3)* + ret i8 addrspace(3)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_private_to_flat( +; HSAIL32: ld_arg_align(4)_u32 [[PTR:\$s[0-9]]], [%ptr]; +; HSAIL32: stof_private_u32_u32 [[CAST:\$s[0-9]+]], [[PTR]]; +; HSAIL32: st_arg_align(4)_u32 [[CAST]] + +; HSAIL64: ld_arg_align(4)_u32 [[PTR:\$s[0-9]]], [%ptr]; +; HSAIL64: stof_private_u64_u32 [[CAST:\$d[0-9]+]], [[PTR]]; +; HSAIL64: st_arg_align(8)_u64 [[CAST]] +define i8 addrspace(4)* @test_addrspacecast_private_to_flat(i8* %ptr) #0 { + %val = addrspacecast i8* %ptr to i8 addrspace(4)* + ret i8 addrspace(4)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_addrspacecast_flat_to_private( +; HSAIL32: ld_arg_align(4)_u32 [[PTR:\$s[0-9]]], [%ptr]; +; HSAIL32: ftos_private_u32_u32 [[CAST:\$s[0-9]+]], [[PTR]]; +; HSAIL32: st_arg_align(4)_u32 [[CAST]] + +; HSAIL64: ld_arg_align(8)_u64 [[PTR:\$d[0-9]]], [%ptr]; +; HSAIL64: ftos_private_u32_u64 [[CAST:\$s[0-9]+]], [[PTR]]; +; HSAIL64: st_arg_align(4)_u32 [[CAST]] +define i8* @test_addrspacecast_flat_to_private(i8 addrspace(4)* %ptr) #0 { + %val = addrspacecast i8 addrspace(4)* %ptr to i8* + ret i8* %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/and1.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/and1.ll @@ -0,0 +1,151 @@ +; RUN: llc -march=hsail -filetype=asm < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test2 +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1) * %in + %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr + %result = and <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test4 +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in + %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr + %result = and <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &s_and_i32 +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %and = and i32 %a, %b + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &s_and_constant_i32 +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1234567; +define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) { + %and = and i32 %a, 1234567 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_and_i32 +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) { + %a = load i32, i32 addrspace(1)* %aptr, align 4 + %b = load i32, i32 addrspace(1)* %bptr, align 4 + %and = and i32 %a, %b + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_and_constant_i32 +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1234567; +define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) { + %a = load i32, i32 addrspace(1)* %aptr, align 4 + %and = and i32 %a, 1234567 + store i32 %and, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &s_and_i64 +; HSAIL: and_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %and = and i64 %a, %b + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &s_and_i1 +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1; +define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) { + %and = and i1 %a, %b + store i1 %and, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &s_and_constant_i64 +; HSAIL: and_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 281474976710655 +define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) { + %and = and i64 %a, 281474976710655 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_and_i64 +; HSAIL: and_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %b = load i64, i64 addrspace(1)* %bptr, align 8 + %and = and i64 %a, %b + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_and_i64_br +; HSAIL-DAG: mov_b64 {{\$d[0-9]+}}, 0; +; HSAIL-DAG: cmp_ne_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: cbr_b1 {{\$c[0-9]+}}, @BB10_2; + +; HSAIL: ld_global_align(8)_u64 {{\$d[0-9]+}}, [{{\$s[0-9]+}}]; +; HSAIL: ld_global_align(8)_u64 {{\$d[0-9]+}}, [{{\$s[0-9]+}}]; +; HSAIL: and_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; + +; HSAIL: {{^}}@BB10_2: +; HSAIL: st_global_align(8)_u64 {{\$d[0-9]+}}, [{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i32 %cond) { +entry: + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %if, label %endif + +if: + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %b = load i64, i64 addrspace(1)* %bptr, align 8 + %and = and i64 %a, %b + br label %endif + +endif: + %tmp1 = phi i64 [%and, %if], [0, %entry] + store i64 %tmp1, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_and_constant_i64 +; HSAIL: and_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 1234567; +define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %and = and i64 %a, 1234567 + store i64 %and, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL:{{^}}prog function &and_i1 +; HSAIL: cmp_ge_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL: cmp_ge_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL: and_b1 {{\$c[0-9]+}}, {{\$c[0-9]+}}, {{\$c[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @and_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { + %a = load float, float addrspace(1) * %in0 + %b = load float, float addrspace(1) * %in1 + %acmp = fcmp oge float %a, 0.000000e+00 + %bcmp = fcmp oge float %b, 0.000000e+00 + %or = and i1 %acmp, %bcmp + %result = select i1 %or, float %a, float %b + store float %result, float addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/anonymous-gv-names.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/anonymous-gv-names.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL: prog readonly_u32 &__unnamed_1[4] = u32[](5, 4, 432, 3); +; HSAIL: prog readonly_u32 &__unnamed_2[4] = u32[](1, 42, 432, 99); +; HSAIL: prog readonly_f32 &__unnamed_3[4] = f32[](0F3f800000, 0F40800000, 0F45800000, 0F41000000); + + +; HSAIL: decl prog function &__unnamed_4()(arg_u32 %arg_p0); +; HSAIL: decl prog function &__unnamed_5()(); +; HSAIL: decl prog function &__unnamed_6()(); +; HSAIL: decl prog function &call_alias()(); +; HSAIL: decl prog function &__unnamed_7()(); + + +@0 = addrspace(2) global [4 x i32] [ i32 5, i32 4, i32 432, i32 3 ] +@1 = addrspace(2) global [4 x i32] [ i32 1, i32 42, i32 432, i32 99 ] +@2 = addrspace(2) constant [4 x float] [ float 1.0, float 4.0, float 4096.0, float 8.0 ] + +declare void @3(i32) + +; HSAIL-LABEL: {{^}}prog function &foo()() +; HSAIL: call &__unnamed_4 () (%__param_p0); +define void @foo() { + call void @3(i32 1) + ret void +} + +; HSAIL-LABEL: {{^}}prog function &__unnamed_5()() +; HSAIL: call &__unnamed_4 () (%__param_p0); +define void @4() { + call void @3(i32 1) + ret void +} + +; HSAIL-LABEL: {{^}}prog function &__unnamed_6()() +; HSAIL: ret; +define void @5() { + ret void +} + +; HSAIL-LABEL: {{^}}prog kernel &__unnamed_8() +; HSAIL: ret; +define spir_kernel void @6() { + ret void +} + +@falias = alias void ()* @5 + +; HSAIL-LABEL: {{^}}prog function &call_alias()() +; HSAIL: call &falias () (); +define void @call_alias() nounwind { + call void @falias() + ret void +} + +; HSAIL-LABEL: {{^}}prog function &__unnamed_7()() +; HSAIL: // BB#0: // %named_entry_block +; HSAIL: ret; +define void @7() { +named_entry_block: + ret void +} Index: test/CodeGen/HSAIL/anonymous-parameters.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/anonymous-parameters.ll @@ -0,0 +1,52 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL: decl prog function &foo0(arg_u32 %ret)(arg_u32 %arg_p0); + +; HSAIL: decl prog function &foo1(arg_u32 %ret)( +; HSAIL-NEXT: arg_u32 %arg_p0, +; HSAIL-NEXT: arg_u32 %arg_p1); + +; HSAIL: decl prog function &foo2(arg_u32 %ret)( +; HSAIL-NEXT: arg_u32 %arg_p0, +; HSAIL-NEXT: arg_u32 %x, +; HSAIL-NEXT: arg_u32 %arg_p2); + +; HSAIL: decl prog function &foo3(arg_u32 %ret)( +; HSAIL-NEXT: arg_u32 %arg_p0, +; HSAIL-NEXT: arg_u32 %x, +; HSAIL-NEXT: arg_u32 %arg_p2, +; HSAIL-NEXT: arg_u32 %y); + + +; HSAIL-LABEL: {{^}}prog function &foo0(arg_u32 %foo0)(arg_u32 %__arg_p0) +; HSAIL-NEXT: { +define i32 @foo0(i32) nounwind { + ret i32 %0 +} + +; HSAIL-LABEL: {{^}}prog function &foo1(arg_u32 %foo1)( +; HSAIL-NEXT: arg_u32 %__arg_p0, +; HSAIL-NEXT: arg_u32 %__arg_p1) +; HSAIL-NEXT: { +define i32 @foo1(i32, i32) nounwind { + ret i32 %1 +} + +; HSAIL-LABEL: {{^}}prog function &foo2(arg_u32 %foo2)( +; HSAIL-NEXT: arg_u32 %__arg_p0, +; HSAIL-NEXT: arg_u32 %x, +; HSAIL-NEXT: arg_u32 %__arg_p2) +; HSAIL-NEXT: { +define i32 @foo2(i32, i32 %x, i32) nounwind { + ret i32 %1 +} + +; HSAIL-LABEL: {{^}}prog function &foo3(arg_u32 %foo3)( +; HSAIL-NEXT: arg_u32 %__arg_p0, +; HSAIL-NEXT: arg_u32 %x, +; HSAIL-NEXT: arg_u32 %__arg_p2, +; HSAIL-NEXT: arg_u32 %y) +; HSAIL-NEXT: { +define i32 @foo3(i32, i32 %x, i32, i32 %y) nounwind { + ret i32 %1 +} Index: test/CodeGen/HSAIL/anyext.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/anyext.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &anyext_i1_i32 +; HSAIL : cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL : cvt_u32_b1 {{\$s[0-9]+}}, {{\$c[0-9]+}}; +; HSAIL : not_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL : and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1; +define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) { +entry: + %0 = icmp eq i32 %cond, 0 + %1 = zext i1 %0 to i8 + %2 = xor i8 %1, -1 + %3 = and i8 %2, 1 + %4 = zext i8 %3 to i32 + store i32 %4, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/atomic_cmp_swap_group.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/atomic_cmp_swap_group.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &lds_atomic_cmpxchg_ret_i32_offset( +; HSAIL: atomic_cas_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 7, {{\$s[0-9]+}}; +define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic + %result = extractvalue { i32, i1 } %pair, 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &lds_atomic_cmpxchg_ret_i64_offset( +; HSAIL: atomic_cas_group_scar_wg_b64 {{\$d[0-9]+}}, [{{\$s[0-9]+}}+32], 7, {{\$d[0-9]+}}; +define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind { + %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 + %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic + %result = extractvalue { i64, i1 } %pair, 0 + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &lds_atomic_cmpxchg_noret_i32_offset( +; HSAIL: atomic_cas_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 7, {{\$s[0-9]+}}; +define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic + %result = extractvalue { i32, i1 } %pair, 0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &lds_atomic_cmpxchg_noret_i64_offset( +; HSAIL: atomic_cas_group_scar_wg_b64 {{\$d[0-9]+}}, [{{\$s[0-9]+}}+32], 7, {{\$d[0-9]+}}; +define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind { + %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 + %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic + %result = extractvalue { i64, i1 } %pair, 0 + ret void +} Index: test/CodeGen/HSAIL/basic-branch.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/basic-branch.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_branch( +; HSAIL: ld_arg_align(4)_u32 [[VAL:\$s[0-9]+]], [%val]; +; HSAIL: cmp_eq_b1_s32 [[CMP:\$c[0-9]+]], [[VAL]], 0; +; HSAIL: cbr_b1 [[CMP]], @BB0_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB0_2:}} +; HSAIL: ret; +define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind { + %cmp = icmp ne i32 %val, 0 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} Index: test/CodeGen/HSAIL/basic-loop.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/basic-loop.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_loop +; HSAIL: ld_arg +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1; +; HSAIL: ld_arg +; HSAIL: {{^@BB0_1:}} +; HSAIL: st_global_align(4)_u32 222, [ +; HSAIL: cmp_ne_b1_s32 $c0, {{\$s[0-9]+}}, 0; +; HSAIL-NEXT: cbr_b1 {{\$c[0-9]+}}, @BB0_1; +; HSAIL: }; +define void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind { +entry: + br label %loop.body + +loop.body: + %i = phi i32 [0, %entry], [%i.inc, %loop.body] + store i32 222, i32 addrspace(1)* %out + %cmp = icmp ne i32 %i, %val + %i.inc = add i32 %i, 1 + br i1 %cmp, label %loop.body, label %end + +end: + ret void +} Index: test/CodeGen/HSAIL/bitalign_opt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/bitalign_opt.ll @@ -0,0 +1,108 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &bitalign_pat0_0( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bytealign_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC0]], 2; +; HSAIL: ret; +define i32 @bitalign_pat0_0(i32 %src0) #0 { + %tmp0 = shl i32 %src0, 16 + %tmp1 = lshr i32 %src0, 16 + %or = or i32 %tmp0, %tmp1 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitalign_pat0_1( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC0]], 1; +; HSAIL: ret; +define i32 @bitalign_pat0_1(i32 %src0) #0 { + %tmp0 = shl i32 %src0, 31 + %tmp1 = lshr i32 %src0, 1 + %or = or i32 %tmp0, %tmp1 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitalign_pat0_2( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC0]], 31; +; HSAIL: ret; +define i32 @bitalign_pat0_2(i32 %src0) #0 { + %tmp0 = shl i32 %src0, 1 + %tmp1 = lshr i32 %src0, 31 + %or = or i32 %tmp0, %tmp1 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitalign_pat0_3( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC0]], 23; +; HSAIL: ret; +define i32 @bitalign_pat0_3(i32 %src0) #0 { + %tmp0 = shl i32 %src0, 9 + %tmp1 = lshr i32 %src0, 23 + %or = or i32 %tmp0, %tmp1 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitalign_pat0_4( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bytealign_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC0]], 3; +; HSAIL: ret; +define i32 @bitalign_pat0_4(i32 %src0) #0 { + %tmp0 = shl i32 %src0, 8 + %tmp1 = lshr i32 %src0, 24 + %or = or i32 %tmp0, %tmp1 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitalign_pat0_5( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bytealign_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC0]], 1; +; HSAIL: ret; +define i32 @bitalign_pat0_5(i32 %src0) #0 { + %tmp0 = shl i32 %src0, 24 + %tmp1 = lshr i32 %src0, 8 + %or = or i32 %tmp0, %tmp1 + ret i32 %or +} + +; FIXME: The pattern looks like it is supposed to eliminate the and of +; src1. + +; HSAIL-LABEL: {{^}}prog function &bitalign_pat1( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: and_b32 [[AND_SRC1:\$s[0-9]+]], [[SRC1]], 31; +; HSAIL-DAG: neg_s32 [[NEG_SRC1:\$s[0-9]+]], [[AND_SRC1]]; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC0]], [[NEG_SRC1]]; +; HSAIL: ret; +define i32 @bitalign_pat1(i32 %src0, i32 %src1) #0 { + %and31 = and i32 %src1, 31 + %shl = shl i32 %src0, %and31 + + %neg.src1 = sub i32 0, %src1 + %and.neg = and i32 %neg.src1, 31 + %srl = lshr i32 %src0, %and.neg + + %or = or i32 %shl, %srl + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitalign_pat2( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: neg_s32 [[NEG_SRC1:\$s[0-9]+]], [[SRC1]] +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC0]], [[NEG_SRC1]]; +; HSAIL: ret; +define i32 @bitalign_pat2(i32 %src0, i32 %src1) #0 { + %shl = shl i32 %src0, %src1 + + %and = and i32 %src1, 31 + %sub = sub i32 32, %and + %srl = lshr i32 %src0, %sub + + %or = or i32 %shl, %srl + ret i32 %or +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/bitcast.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/bitcast.ll @@ -0,0 +1,100 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) #0 { + %tmp0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)* + %tmp1 = load <16 x i8>, <16 x i8> addrspace(1)* %tmp0 + store <16 x i8> %tmp1, <16 x i8> addrspace(1)* %out + ret void +} + +define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) #0 { + %load = load float, float addrspace(1)* %in, align 4 + %bc = bitcast float %load to <2 x i16> + store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4 + ret void +} + +define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { + %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4 + %bc = bitcast <2 x i16> %load to float + store float %bc, float addrspace(1)* %out, align 4 + ret void +} + +define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + %bc = bitcast <4 x i8> %load to i32 + store i32 %bc, i32 addrspace(1)* %out, align 4 + ret void +} + +define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %load = load i32, i32 addrspace(1)* %in, align 4 + %bc = bitcast i32 %load to <4 x i8> + store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &bitcast_v2i32_to_f64( +define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 { + %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 + %add = add <2 x i32> %val, + %bc = bitcast <2 x i32> %add to double + store double %bc, double addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &bitcast_f64_to_v2i32( +define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) #0 { + %val = load double, double addrspace(1)* %in, align 8 + %add = fadd double %val, 4.0 + %bc = bitcast double %add to <2 x i32> + store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &bitcast_i64_to_f64( +; HSAIL: add_u64 [[RESULT:\$d[0-9]+]], +; HSAIL: st_global_align(8)_u64 [[RESULT]] +define void @bitcast_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) #0 { + %val = load i64, i64 addrspace(1)* %in, align 8 + %add = add i64 %val, 12345 + %bc = bitcast i64 %add to double + store double %bc, double addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &bitcast_f64_to_i64( +; HSAIL: add_f64 [[RESULT:\$d[0-9]+]], +; HSAIL: st_global_align(8)_f64 [[RESULT]] +define void @bitcast_f64_to_i64(i64 addrspace(1)* %out, double addrspace(1)* %in) #0 { + %val = load double, double addrspace(1)* %in, align 8 + %add = fadd double %val, 4.0 + %bc = bitcast double %add to i64 + store i64 %bc, i64 addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &bitcast_i32_to_f32( +; HSAIL: add_u32 [[RESULT:\$s[0-9]+]], +; HSAIL: st_global_align(4)_u32 [[RESULT]] +define void @bitcast_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 { + %val = load i32, i32 addrspace(1)* %in, align 4 + %add = add i32 %val, 12345 + %bc = bitcast i32 %add to float + store float %bc, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &bitcast_f32_to_i32( +; HSAIL: add_ftz_f32 [[RESULT:\$s[0-9]+]], +; HSAIL: st_global_align(4)_f32 [[RESULT]] +define void @bitcast_f32_to_i32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { + %val = load float, float addrspace(1)* %in, align 4 + %add = fadd float %val, 4.0 + %bc = bitcast float %add to i32 + store i32 %bc, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/bitextract_opt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/bitextract_opt.ll @@ -0,0 +1,436 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rii_0( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], 11, 3; +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rii_0(i32 %src0) #0 { + %tmp0 = lshr i32 %src0, 11 + %tmp1 = and i32 %tmp0, 7 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rii_1( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-NEXT: shr_u32 [[RESULT:\$s[0-9]+]], [[SRC0]], 16 +; HSAIL-NEXT: st_arg_align(4)_u32 [[RESULT]] +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rii_1(i32 %src0) #0 { + %tmp0 = lshr i32 %src0, 16 + %tmp1 = and i32 %tmp0, 65535 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rii_2( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-NEXT: shr_u32 [[RESULT:\$s[0-9]+]], [[SRC0]], 31 +; HSAIL-NEXT: st_arg_align(4)_u32 [[RESULT]] +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rii_2(i32 %src0) #0 { + %tmp0 = lshr i32 %src0, 31 + %tmp1 = and i32 %tmp0, 7 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rii_3( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], 20, 1; +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rii_3(i32 %src0) #0 { + %tmp0 = lshr i32 %src0, 20 + %tmp1 = and i32 %tmp0, 1 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rii_4( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], 1, 5; +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rii_4(i32 %src0) #0 { + %tmp0 = lshr i32 %src0, 1 + %tmp1 = and i32 %tmp0, 31 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rii_5( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], 27, 4; +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rii_5(i32 %src0) #0 { + %tmp0 = lshr i32 %src0, 27 + %tmp1 = and i32 %tmp0, 15 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rii_not_0( +; HSAIL-NOT: bitextract_u32 +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rii_not_0(i32 %src0) #0 { + %tmp0 = lshr i32 %src0, 11 + %tmp1 = and i32 %tmp0, 8 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rri_0( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], [[SRC1]], 3; +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rri_0(i32 %src0, i32 %src1) #0 { + %tmp0 = lshr i32 %src0, %src1 + %tmp1 = and i32 %tmp0, 7 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i32_rri_not_0( +; HSAIL-NOT: bitextract_u32 +; HSAIL: ret; +define i32 @bitextract_pat0_i32_rri_not_0(i32 %src0, i32 %src1) #0 { + %tmp0 = lshr i32 %src0, %src1 + %tmp1 = and i32 %tmp0, 8 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rii_0( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 11, 3; +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rii_0(i64 %src0) #0 { + %tmp0 = lshr i64 %src0, 11 + %tmp1 = and i64 %tmp0, 7 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rii_1( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 31, 16; +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rii_1(i64 %src0) #0 { + %tmp0 = lshr i64 %src0, 31 + %tmp1 = and i64 %tmp0, 65535 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rii_2( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 31, 32; +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rii_2(i64 %src0) #0 { + %tmp0 = lshr i64 %src0, 31 + %tmp1 = and i64 %tmp0, 4294967295 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rii_3( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 1, 32; +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rii_3(i64 %src0) #0 { + %tmp0 = lshr i64 %src0, 1 + %tmp1 = and i64 %tmp0, 4294967295 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rii_4( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 32, 3; +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rii_4(i64 %src0) #0 { + %tmp0 = lshr i64 %src0, 32 + %tmp1 = and i64 %tmp0, 7 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rii_5( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 33, 3; +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rii_5(i64 %src0) #0 { + %tmp0 = lshr i64 %src0, 33 + %tmp1 = and i64 %tmp0, 7 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rii_6( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL-NEXT: shr_u64 [[RESULT:\$d[0-9]+]], [[SRC0]], 63; +; HSAIL-NEXT: st_arg_align(8)_u64 [[RESULT]] +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rii_6(i64 %src0) #0 { + %tmp0 = lshr i64 %src0, 63 + %tmp1 = and i64 %tmp0, 7 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rii_not_0( +; HSAIL-NOT: bitextract_u64 +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rii_not_0(i64 %src0) #0 { + %tmp0 = lshr i64 %src0, 11 + %tmp1 = and i64 %tmp0, 8 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rri_0( +; HSAIL-DAG: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(8)_u64 [[SRC1:\$d[0-9]+]], [%src1]; +; HSAIL-DAG: cvt_u32_u64 [[TRUNCSRC1:\$s[0-9]+]], [[SRC1]]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], [[TRUNCSRC1]], 3; +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rri_0(i64 %src0, i64 %src1) #0 { + %tmp0 = lshr i64 %src0, %src1 + %tmp1 = and i64 %tmp0, 7 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_pat0_i64_rri_not_0( +; HSAIL-NOT: bitextract_u64 +; HSAIL: ret; +define i64 @bitextract_pat0_i64_rri_not_0(i64 %src0, i64 %src1) #0 { + %tmp0 = lshr i64 %src0, %src1 + %tmp1 = and i64 %tmp0, 8 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_0( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 11, 3; +define i32 @bitextract_signed_pat1_i32_rii_0(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 11 ; shift + %tmp1 = and i32 %tmp0, 7 ; mask + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_1( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-NEXT: shr_u32 [[RESULT:\$s[0-9]+]], [[SRC0]], 16 +; HSAIL-NEXT: st_arg_align(4)_u32 [[RESULT]] +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_1(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 16 + %tmp1 = and i32 %tmp0, 65535 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_2( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-NEXT: shr_s32 [[TMP:\$s[0-9]+]], [[SRC0]], 31 +; HSAIL-NEXT: and_b32 [[RESULT:\$s[0-9]+]], [[TMP]], 7; +; HSAIL-NEXT: st_arg_align(4)_u32 [[RESULT]] +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_2(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 31 + %tmp1 = and i32 %tmp0, 7 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_3( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], 20, 1; +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_3(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 20 + %tmp1 = and i32 %tmp0, 1 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_4( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], 1, 5; +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_4(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 1 + %tmp1 = and i32 %tmp0, 31 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_5( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], 27, 4; +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_5(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 27 + %tmp1 = and i32 %tmp0, 15 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_6( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, [[SRC0]], 27, 3; +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_6(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 27 + %tmp1 = and i32 %tmp0, 7 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_not_0( +; HSAIL-NOT: bitextract_u32 +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_not_0(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 11 + %tmp1 = and i32 %tmp0, 8 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rri_0( +; HSAIL: shr_s32 +; HSAIL: and_b32 +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rri_0(i32 %src0, i32 %src1) #0 { + %tmp0 = ashr i32 %src0, %src1 + %tmp1 = and i32 %tmp0, 7 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rri_not_0( +; HSAIL-NOT: bitextract_u32 +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rri_not_0(i32 %src0, i32 %src1) #0 { + %tmp0 = ashr i32 %src0, %src1 + %tmp1 = and i32 %tmp0, 8 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_not_1( +; HSAIL-NOT: bitextract_u32 +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_not_1(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 27 + %tmp1 = and i32 %tmp0, 8 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i32_rii_not_2( +; HSAIL-NOT: bitextract_u32 +; HSAIL: ret; +define i32 @bitextract_signed_pat1_i32_rii_not_2(i32 %src0) #0 { + %tmp0 = ashr i32 %src0, 27 + %tmp1 = and i32 %tmp0, 8 + ret i32 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_0( +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 11, 3; +define i64 @bitextract_signed_pat1_i64_rii_0(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 11 ; shift + %tmp1 = and i64 %tmp0, 7 ; mask + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_1( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL-NEXT: shr_u64 [[RESULT:\$d[0-9]+]], [[SRC0]], 32 +; HSAIL-NEXT: st_arg_align(8)_u64 [[RESULT]] +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_1(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 32 + %tmp1 = and i64 %tmp0, 4294967295 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_2( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL-NEXT: shr_s64 [[TMP:\$d[0-9]+]], [[SRC0]], 63 +; HSAIL-NEXT: and_b64 [[RESULT:\$d[0-9]+]], [[TMP]], 7; +; HSAIL-NEXT: st_arg_align(8)_u64 [[RESULT]] +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_2(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 63 + %tmp1 = and i64 %tmp0, 7 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_3( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 20, 1; +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_3(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 20 + %tmp1 = and i64 %tmp0, 1 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_4( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 1, 5; +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_4(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 1 + %tmp1 = and i64 %tmp0, 31 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_5( +; HSAIL: ld_arg_align(8)_u64 [[SRC0:\$d[0-9]+]], [%src0]; +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 27, 4; +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_5(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 27 + %tmp1 = and i64 %tmp0, 15 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_6( +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, [[SRC0]], 27, 3; +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_6(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 27 + %tmp1 = and i64 %tmp0, 7 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_not_0( +; HSAIL-NOT: bitextract_u64 +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_not_0(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 11 + %tmp1 = and i64 %tmp0, 8 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rri_0( +; HSAIL: shr_s64 +; HSAIL: and_b64 +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rri_0(i64 %src0, i64 %src1) #0 { + %tmp0 = ashr i64 %src0, %src1 + %tmp1 = and i64 %tmp0, 7 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rri_not_0( +; HSAIL-NOT: bitextract_u64 +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rri_not_0(i64 %src0, i64 %src1) #0 { + %tmp0 = ashr i64 %src0, %src1 + %tmp1 = and i64 %tmp0, 8 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_not_1( +; HSAIL-NOT: bitextract_u64 +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_not_1(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 27 + %tmp1 = and i64 %tmp0, 8 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_not_2( +; HSAIL-NOT: bitextract_u64 +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_not_2(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 27 + %tmp1 = and i64 %tmp0, 8 + ret i64 %tmp1 +} + +; HSAIL-LABEL: {{^}}prog function &bitextract_signed_pat1_i64_rii_not_3( +; HSAIL-NOT: bitextract_u64 +; HSAIL: ret; +define i64 @bitextract_signed_pat1_i64_rii_not_3(i64 %src0) #0 { + %tmp0 = ashr i64 %src0, 59 + %tmp1 = and i64 %tmp0, 8 + ret i64 %tmp1 +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/bitselect_opt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/bitselect_opt.ll @@ -0,0 +1,335 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat0_rrr( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; + +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC1]], [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat0_rrr(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src0, %src1 + %tmp1 = xor i32 %src0, -1 + %tmp2 = and i32 %src2, %tmp1 + %or = or i32 %tmp0, %tmp2 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat0_irr( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, 17, [[SRC1]], [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat0_irr(i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src1, 17 + %tmp2 = and i32 %src2, -18 + %or = or i32 %tmp0, %tmp2 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat0_rir( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], 17, [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat0_rir(i32 %src0, i32 %src2) #0 { + %tmp0 = and i32 %src0, 17 + %tmp1 = xor i32 %src0, -1 + %tmp2 = and i32 %src2, %tmp1 + %or = or i32 %tmp0, %tmp2 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat0_rii( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], 11, 17; +; HSAIL: ret; +define i32 @bitselect_pat0_rii(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src0, 11 + %tmp1 = and i32 %src0, 17 + %tmp2 = xor i32 %tmp1, 17 + %or = or i32 %tmp0, %tmp2 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat1_rii( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], 17, 19; +; HSAIL: ret; +define i32 @bitselect_pat1_rii(i32 %src0, i32 %src2) #0 { + %tmp0 = and i32 %src0, 17 + %tmp1 = and i32 %src0, 19 + %tmp2 = xor i32 %tmp1, 19 + + %or = or i32 %tmp0, %tmp2 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_not_pat1_rii( +; HSAIL-NOT: bitselect_b32 +; HSAIL: ret; +define i32 @bitselect_not_pat1_rii(i32 %src0, i32 %src2) #0 { + %tmp0 = and i32 %src0, 17 + %tmp1 = and i32 %src0, 19 + %tmp2 = xor i32 %tmp1, 18 + + %or = or i32 %tmp0, %tmp2 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat1_rri( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC1]], 13; +; HSAIL: ret; +define i32 @bitselect_pat1_rri(i32 %src0, i32 %src1) #0 { + %tmp0 = and i32 %src0, %src1 + + %tmp1 = xor i32 %src0, -1 + %tmp2 = and i32 %tmp1, 13 + + %or = or i32 %tmp0, %tmp2 + ret i32 %or +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat2_rri( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC1]], 11; +; HSAIL: ret; +define i32 @bitselect_pat2_rri(i32 %src0, i32 %src1) #0 { + %tmp0 = and i32 %src0, %src1 + + %tmp1 = and i32 %src0, 11 + %tmp2 = xor i32 %tmp1, 11 + + %xor = xor i32 %tmp0, %tmp2 + ret i32 %xor +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_not_pat2_rri( +; HSAIL-NOT: bitselect_b32 +; HSAIL: ret; +define i32 @bitselect_not_pat2_rri(i32 %src0, i32 %src1) #0 { + %tmp0 = and i32 %src0, %src1 + + %tmp1 = and i32 %src0, 12 + %tmp2 = xor i32 %tmp1, 11 + + %xor = xor i32 %tmp0, %tmp2 + ret i32 %xor +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat3_rrr( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC1]], [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat3_rrr(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = xor i32 %src1, %src2 + %tmp1 = and i32 %src0, %tmp0 + %tmp2 = xor i32 %src2, %tmp1 + ret i32 %tmp2 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat3_irr( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, 7, [[SRC1]], [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat3_irr(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = xor i32 %src1, %src2 + %tmp1 = and i32 %tmp0, 7 + %tmp2 = xor i32 %src2, %tmp1 + ret i32 %tmp2 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat3_iir( +; HSAIL: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, 23, 7, [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat3_iir(i32 %src2) #0 { + %tmp0 = xor i32 7, %src2 + %tmp1 = and i32 23, %tmp0 + %tmp2 = xor i32 %src2, %tmp1 + ret i32 %tmp2 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat3_rir( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], 11, [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat3_rir(i32 %src0, i32 %src2) #0 { + %tmp0 = xor i32 11, %src2 + %tmp1 = and i32 %src0, %tmp0 + %tmp2 = xor i32 %src2, %tmp1 + ret i32 %tmp2 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat3_rri( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC1]], 23; +; HSAIL: ret; +define i32 @bitselect_pat3_rri(i32 %src0, i32 %src1) #0 { + %tmp0 = xor i32 %src1, 23 + %tmp1 = and i32 %src0, %tmp0 + %tmp2 = xor i32 23, %tmp1 + ret i32 %tmp2 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_not_pat3_rri( +; HSAIL-NOT: bitselect_b32 +; HSAIL: ret; +define i32 @bitselect_not_pat3_rri(i32 %src0, i32 %src1) #0 { + %tmp0 = xor i32 %src1, 23 + %tmp1 = and i32 %src0, %tmp0 + %tmp2 = xor i32 17, %tmp1 + ret i32 %tmp2 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat4_rrr( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC1]], [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat4_rrr(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src0, %src2 + %tmp1 = xor i32 %src2, %tmp0 + + %tmp2 = and i32 %src0, %src1 + + %tmp3 = xor i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +; FIXME: This should match, but permuting the operators prevents the +; match. + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat4_rrr_commute0( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL-NOT: bitselect_b32 +; HSAIL: ret; +define i32 @bitselect_pat4_rrr_commute0(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src0, %src2 + %tmp1 = xor i32 %tmp0, %src2 ; Commuting breaks match + + %tmp2 = and i32 %src0, %src1 + + %tmp3 = xor i32 %tmp2, %tmp1 + ret i32 %tmp3 +} + +; bitselect_pat4_rrr run through instcombine. +; FIXME: This should match bitselect +; HSAIL-LABEL: {{^}}prog function &bitselect_pat4_canonical_rrr( +; HSAIL-NOT: bitselect_b32 +; HSAIL: ret; +define i32 @bitselect_pat4_canonical_rrr(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = xor i32 %src0, -1 + %tmp1 = and i32 %tmp0, %src2 + %tmp2 = and i32 %src0, %src1 + %tmp3 = xor i32 %tmp2, %tmp1 + ret i32 %tmp3 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat4_irr( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, 9, [[SRC1]], [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat4_irr(i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 9, %src2 + %tmp1 = xor i32 %src2, %tmp0 + + %tmp2 = and i32 9, %src1 + + %tmp3 = xor i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_not_pat4_irr( +; HSAIL-NOT: bitselect_b32 +; HSAIL: ret; +define i32 @bitselect_not_pat4_irr(i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 9, %src2 + %tmp1 = xor i32 %src2, %tmp0 + + %tmp2 = and i32 11, %src1 + + %tmp3 = xor i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat4_iir( +; HSAIL: ld_arg_align(4)_u32 [[SRC2:\$s[0-9]+]], [%src2]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, 7, 5, [[SRC2]]; +; HSAIL: ret; +define i32 @bitselect_pat4_iir(i32 %src2) #0 { + %tmp0 = and i32 %src2, 7 + %tmp1 = xor i32 %src2, %tmp0 + %tmp2 = xor i32 %tmp1, 5 + ret i32 %tmp2 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat4_rri( +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[SRC1:\$s[0-9]+]], [%src1]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], [[SRC1]], 9; +; HSAIL: ret; +define i32 @bitselect_pat4_rri(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src0, 9 + %tmp1 = xor i32 %tmp0, 9 + + %tmp2 = and i32 %src0, %src1 + + %tmp3 = xor i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_not_pat4_rri( +; HSAIL-NOT: bitselect_b32 +; HSAIL: ret; +define i32 @bitselect_not_pat4_rri(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src0, 9 + %tmp1 = xor i32 %tmp0, 12 + + %tmp2 = and i32 %src0, %src1 + + %tmp3 = xor i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_pat4_rii( +; HSAIL: ld_arg_align(4)_u32 [[SRC0:\$s[0-9]+]], [%src0]; +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, [[SRC0]], 7, 9; +; HSAIL: ret; +define i32 @bitselect_pat4_rii(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src0, 9 + %tmp1 = xor i32 %tmp0, 9 + + %tmp2 = and i32 %src0, 7 + + %tmp3 = xor i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +; HSAIL-LABEL: {{^}}prog function &bitselect_not_pat4_rii( +; HSAIL-NOT: bitselect_b32 +; HSAIL: ret; +define i32 @bitselect_not_pat4_rii(i32 %src0, i32 %src1, i32 %src2) #0 { + %tmp0 = and i32 %src0, 10 + %tmp1 = xor i32 %tmp0, 9 + + %tmp2 = and i32 %src0, 7 + + %tmp3 = xor i32 %tmp1, %tmp2 + ret i32 %tmp3 +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/broken-chain-retval.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/broken-chain-retval.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; This should work correctly, but the chain is somehow messed up when +; this is a return value. This incorrect selects if the intrinsic is +; marked readonly. + +declare i32 @llvm.HSAIL.activelaneid.u32() #0 + +; HSAIL-LABEL: {{^}}prog function &test_activelaneid_u32( +; HSAIL: activelaneid_u32 {{\$s[0-9]+}}; +define i32 @test_activelaneid_u32() #0 { + %tmp = call i32 @llvm.HSAIL.activelaneid.u32() #0 + ret i32 %tmp +} + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/build_vector.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/build_vector.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &build_vector2 +; HSAIL: ld_arg_align(4)_u32 [[PTR:\$s[0-9]+]] +; HSAIL-DAG: st_global_align(4)_u32 6, {{\[}}[[PTR]]+4]; +; HSAIL-DAG: st_global_align(8)_u32 5, {{\[}}[[PTR]]{{\]}}; +; HSAIL: ret; +define void @build_vector2 (<2 x i32> addrspace(1)* %out) { +entry: + store <2 x i32> , <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &build_vector4 +; HSAIL: ld_arg_align(4)_u32 [[PTR:\$s[0-9]+]] +; HSAIL-DAG: st_global_align(4)_u32 8, {{\[}}[[PTR]]+12]; +; HSAIL-DAG: st_global_align(8)_u32 7, {{\[}}[[PTR]]+8]; +; HSAIL-DAG: st_global_align(4)_u32 6, {{\[}}[[PTR]]+4]; +; HSAIL-DAG: st_global_align(16)_u32 5, {{\[}}[[PTR]]{{\]}}; +; HSAIL: ret +define void @build_vector4 (<4 x i32> addrspace(1)* %out) { +entry: + store <4 x i32> , <4 x i32> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/call.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/call.ll @@ -0,0 +1,365 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare void @extern.noret() #0 +declare i1 @extern.i1() #0 +declare i8 @extern.i8() #0 +declare i16 @extern.i16() #0 +declare i32 @extern.i32() #0 +declare i64 @extern.i64() #0 +declare void @extern.argi1(i1 %x) #0 +declare void @extern.argi8(i8 %x) #0 +declare void @extern.argi16(i16 %x) #0 +declare void @extern.argi32(i32 %x) #0 +declare void @extern.argf32(float %x) #0 +declare void @extern.argi64(i64 %x) #0 + +; HSAIL-LABEL: {{^}}decl prog function &extern.argv4i32()(align(16) arg_u32 %arg_p0[4]); +declare void @extern.argv4i32(<4 x i32>) #0 +declare void @extern.argv3i32(<3 x i32>) #0 +declare void @extern.argv1i32(<1 x i32>) #0 + +; HSAIL: {{^}}decl prog function &extern.argv4i8()(align(4) arg_u8 %arg_p0[4]); +declare void @extern.argv4i8(<4 x i8>) #0 + +; HSAIL: {{^}}decl prog function &extern.arg.array.8xi32()(arg_u32 %arg_p0[8]); +declare void @extern.arg.array.8xi32([8 x i32]) #0 + +; HSAIL: {{^}}decl prog function &argument.name.is.shadowed()(arg_u32 %is.shadowed); +declare void @argument.name.is.shadowed(i32 %is.shadowed) #0 + +; HSAIL: {{^}}decl prog function &argument.name.is.shadowed.vector()(align(8) arg_u32 %is.shadowed[2]); +declare void @argument.name.is.shadowed.vector(<2 x i32> %is.shadowed) #0 + + +; HSAIL: {{^}}decl prog function &two_args()( +; HSAIL-NEXT: arg_u32 %a, +; HSAIL-NEXT: arg_f64 %b); +declare void @two_args(i32 %a, double %b) #0 + +; HSAIL: {{^}}decl prog function &two_anon_args()( +; HSAIL-NEXT: arg_f64 %arg_p0, +; HSAIL-NEXT: arg_u32 %arg_p1); +declare void @two_anon_args(double, i32) #0 + +; HSAIL-LABEL{{^}}prog function &test_void_call_no_args( +; HSAIL: { +; HSAIL: call &extern.noret () (); +; HSAIL: } +define void @test_void_call_no_args() #0 { + call void @extern.noret() #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_no_args_ret_i32( +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u32 %extern.i32; +; HSAIL-NEXT: call &extern.i32 (%extern.i32) (); +; HSAIL-NEXT: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%extern.i32]; +; HSAIL-NEXT: } +; HSAIL: } +define i32 @test_call_no_args_ret_i32() #0 { + %ret = call i32 @extern.i32() #0 + ret i32 %ret +} + +; HSAIL-LABEL{{^}}prog function &test_call_no_args_ret_i64( +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u64 %extern.i64; +; HSAIL-NEXT: call &extern.i64 (%extern.i64) (); +; HSAIL-NEXT: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%extern.i64]; +; HSAIL-NEXT: } +; HSAIL: } +define i64 @test_call_no_args_ret_i64() #0 { + %ret = call i64 @extern.i64() #0 + ret i64 %ret +} + +; HSAIL-LABEL{{^}}prog function &test_call_no_args_ret_i1( +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u8 %extern.i1; +; HSAIL-NEXT: call &extern.i1 (%extern.i1) (); +; HSAIL-NEXT: ld_arg_u8 {{\$s[0-9]+}}, [%extern.i1]; +; HSAIL-NEXT: } +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1; +; HSAIL: } +define i1 @test_call_no_args_ret_i1() #0 { + %ret = call i1 @extern.i1() #0 + ret i1 %ret +} + +; HSAIL-LABEL{{^}}prog function &test_call_no_args_ret_i8( +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u8 %extern.i8; +; HSAIL-NEXT: call &extern.i8 (%extern.i8) (); +; HSAIL-NEXT: ld_arg_u8 {{\$s[0-9]+}}, [%extern.i8]; +; HSAIL-NEXT: } +; HSAIL: } +define i8 @test_call_no_args_ret_i8() #0 { + %ret = call i8 @extern.i8() #0 + ret i8 %ret +} + +; HSAIL-LABEL{{^}}prog function &test_call_no_args_ret_i16( +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u16 %extern.i16; +; HSAIL-NEXT: call &extern.i16 (%extern.i16) (); +; HSAIL-NEXT: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%extern.i16]; +; HSAIL-NEXT: } +; HSAIL: } +define i16 @test_call_no_args_ret_i16() #0 { + %ret = call i16 @extern.i16() #0 + ret i16 %ret +} + +; HSAIL-LABEL{{^}}prog function &test_call_i32_arg( +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u32 %x; +; HSAIL-NEXT: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%x]; +; HSAIL-NEXT: call &extern.argi32 () (%x); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_i32_arg(i32 %x) #0 { + call void @extern.argi32(i32 %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_f32_arg( +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_f32 %x; +; HSAIL-NEXT: st_arg_align(4)_f32 {{\$s[0-9]+}}, [%x]; +; HSAIL-NEXT: call &extern.argf32 () (%x); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_f32_arg(float %x) #0 { + call void @extern.argf32(float %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_i64_arg()(arg_u64 %x) +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u64 %x; +; HSAIL-NEXT: st_arg_align(8)_u64 {{\$d[0-9]+}}, [%x]; +; HSAIL-NEXT: call &extern.argi64 () (%x); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_i64_arg(i64 %x) #0 { + call void @extern.argi64(i64 %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_i8_arg()(arg_u8 %x) +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u8 %x; +; HSAIL-NEXT: st_arg_u8 {{\$s[0-9]+}}, [%x]; +; HSAIL-NEXT: call &extern.argi8 () (%x); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_i8_arg(i8 %x) #0 { + call void @extern.argi8(i8 %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_i16_arg()(arg_u16 %x) +; HSAIL: { +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u16 %x; +; HSAIL-NEXT: st_arg_align(2)_u16 {{\$s[0-9]+}}, [%x]; +; HSAIL-NEXT: call &extern.argi16 () (%x); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_i16_arg(i16 %x) #0 { + call void @extern.argi16(i16 %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_i1_arg()(arg_u8 %x) +; HSAIL: { +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1; +; HSAIL: { +; HSAIL-NEXT: {{^[ \t]}}arg_u8 %x; +; HSAIL-NEXT: st_arg_u8 {{\$s[0-9]+}}, [%x]; +; HSAIL-NEXT: call &extern.argi1 () (%x); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_i1_arg(i1 %x) #0 { + call void @extern.argi1(i1 %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_v4i32_arg()(align(16) arg_u32 %x[4]) +; HSAIL: { +; HSAIL: ld_arg_align(16)_u32 [[LDELT0:\$s[0-9]+]], [%x]; +; HSAIL: ld_arg_align(4)_u32 [[LDELT1:\$s[0-9]+]], [%x][4]; +; HSAIL: ld_arg_align(8)_u32 [[LDELT2:\$s[0-9]+]], [%x][8]; +; HSAIL: ld_arg_align(4)_u32 [[LDELT3:\$s[0-9]+]], [%x][12]; +; HSAIL-NEXT: { +; HSAIL-NEXT: align(16) arg_u32 %__param_p0[4]; +; HSAIL-NEXT: st_arg_align(16)_u32 [[LDELT0]], [%__param_p0]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[LDELT1]], [%__param_p0][4]; +; HSAIL-NEXT: st_arg_align(8)_u32 [[LDELT2]], [%__param_p0][8]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[LDELT3]], [%__param_p0][12]; +; HSAIL-NEXT: call &extern.argv4i32 () (%__param_p0); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_v4i32_arg(<4 x i32> %x) #0 { + call void @extern.argv4i32(<4 x i32> %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_v3i32_arg()(align(16) arg_u32 %x[4]) +; HSAIL: { +; HSAIL: ld_arg_align(16)_u32 [[LDELT0:\$s[0-9]+]], [%x]; +; HSAIL: ld_arg_align(4)_u32 [[LDELT1:\$s[0-9]+]], [%x][4]; +; HSAIL: ld_arg_align(8)_u32 [[LDELT2:\$s[0-9]+]], [%x][8]; +; HSAIL-NEXT: { +; HSAIL-NEXT: align(16) arg_u32 %__param_p0[4]; +; HSAIL-NEXT: st_arg_align(16)_u32 [[LDELT0]], [%__param_p0]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[LDELT1]], [%__param_p0][4]; +; HSAIL-NEXT: st_arg_align(8)_u32 [[LDELT2]], [%__param_p0][8]; +; HSAIL-NEXT: call &extern.argv3i32 () (%__param_p0); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_v3i32_arg(<3 x i32> %x) #0 { + call void @extern.argv3i32(<3 x i32> %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_v1i32_arg()(arg_u32 %x[1]) +; HSAIL: { +; HSAIL: ld_arg_align(4)_u32 [[LDARG:\$s[0-9]+]], [%x]; +; HSAIL-NEXT: { +; HSAIL-NEXT: {{^[ \t]}}arg_u32 %__param_p0[1]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[LDELT0]], [%__param_p0]; +; HSAIL-NEXT: call &extern.argv1i32 () (%__param_p0); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_v1i32_arg(<1 x i32> %x) #0 { + call void @extern.argv1i32(<1 x i32> %x) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_call_v4i8_arg()(align(4) arg_u8 %x[4]) +; HSAIL: { +; HSAIL: ld_arg_align(4)_u8 [[LDELT0:\$s[0-9]+]], [%x]; +; HSAIL: ld_arg_u8 [[LDELT1:\$s[0-9]+]], [%x][1]; +; HSAIL: ld_arg_align(2)_u8 [[LDELT2:\$s[0-9]+]], [%x][2]; +; HSAIL: ld_arg_u8 [[LDELT3:\$s[0-9]+]], [%x][3]; +; HSAIL-NEXT: { +; HSAIL-NEXT: align(4) arg_u8 %__param_p0[4]; +; HSAIL-NEXT: st_arg_align(4)_u8 [[LDELT0]], [%__param_p0]; +; HSAIL-NEXT: st_arg_u8 [[LDELT1]], [%__param_p0][1]; +; HSAIL-NEXT: st_arg_align(2)_u8 [[LDELT2]], [%__param_p0][2]; +; HSAIL-NEXT: st_arg_u8 [[LDELT3]], [%__param_p0][3]; +; HSAIL-NEXT: call &extern.argv4i8 () (%__param_p0); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_v4i8_arg(<4 x i8> %x) #0 { + call void @extern.argv4i8(<4 x i8> %x) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_call_array_8xi32_arg()(arg_u32 %x[8]) +; HSAIL: ld_arg_align(4)_u32 [[ELT0:\$s[0-9]+]], [%x]; +; HSAIL-NEXT: ld_arg_align(4)_u32 [[ELT1:\$s[0-9]+]], [%x][4]; +; HSAIL-NEXT: ld_arg_align(4)_u32 [[ELT2:\$s[0-9]+]], [%x][8]; +; HSAIL-NEXT: ld_arg_align(4)_u32 [[ELT3:\$s[0-9]+]], [%x][12]; +; HSAIL-NEXT: ld_arg_align(4)_u32 [[ELT4:\$s[0-9]+]], [%x][16]; +; HSAIL-NEXT: ld_arg_align(4)_u32 [[ELT5:\$s[0-9]+]], [%x][20]; +; HSAIL-NEXT: ld_arg_align(4)_u32 [[ELT6:\$s[0-9]+]], [%x][24]; +; HSAIL-NEXT: ld_arg_align(4)_u32 [[ELT7:\$s[0-9]+]], [%x][28]; +; HSAIL-NEXT: { +; HSAIL-NEXT: {{^[ \t]}}arg_u32 %__param_p0[8]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[ELT0]], [%__param_p0]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[ELT1]], [%__param_p0][4]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[ELT2]], [%__param_p0][8]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[ELT3]], [%__param_p0][12]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[ELT4]], [%__param_p0][16]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[ELT5]], [%__param_p0][20]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[ELT6]], [%__param_p0][24]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[ELT7]], [%__param_p0][28]; +; HSAIL-NEXT: call &extern.arg.array.8xi32 () (%__param_p0); +; HSAIL-NEXT: } +; HSAIL-NEXT: ret; +define void @test_call_array_8xi32_arg([8 x i32] %x) #0 { + call void @extern.arg.array.8xi32([8 x i32] %x) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_shadow_argument_name()(arg_u32 %is.shadowed) +; HSAIL: { +; HSAIL: ld_arg_align(4)_u32 [[LDARG:\$s[0-9]+]], [%is.shadowed]; +; HSAIL-NEXT: { +; HSAIL-NEXT: {{^[ \t]}}arg_u32 %is.shadowed; +; HSAIL-NEXT: st_arg_align(4)_u32 [[LDARG]], [%is.shadowed]; +; HSAIL-NEXT: call &argument.name.is.shadowed () (%is.shadowed); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_shadow_argument_name(i32 %is.shadowed) #0 { + call void @argument.name.is.shadowed(i32 %is.shadowed) #0 + ret void +} + +; HSAIL-LABEL{{^}}prog function &test_call_shadow_argument_name_vector()(arg_u32 %is.shadowed[2]) +; HSAIL: { +; HSAIL: ld_arg_align(8)_u32 [[LDELT0:\$s[0-9]+]], [%is.shadowed]; +; HSAIL: ld_arg_align(4)_u32 [[LDELT1:\$s[0-9]+]], [%is.shadowed][4]; +; HSAIL-NEXT: { +; HSAIL-NEXT: align(8) arg_u32 %is.shadowed[2]; +; HSAIL-NEXT: st_arg_align(8)_u32 [[LDELT0]], [%is.shadowed]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[LDELT1]], [%is.shadowed][4]; +; HSAIL-NEXT: call &argument.name.is.shadowed.vector () (%is.shadowed); +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_shadow_argument_name_vector(<2 x i32> %is.shadowed) #0 { + call void @argument.name.is.shadowed.vector(<2 x i32> %is.shadowed) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_call_two_args()( +; HSAIL-NEXT: arg_u32 %x, +; HSAIL-NEXT: arg_f64 %y) +; HSAIL-NEXT: { +; HSAIL: ld_arg_align(4)_u32 [[LDX:\$s[0-9]+]], [%x]; +; HSAIL-NEXT: ld_arg_align(8)_f64 [[LDY:\$d[0-9]+]], [%y]; +; HSAIL-NEXT: { +; HSAIL-NEXT: {{^[ \t]}}arg_u32 %a; +; HSAIL-NEXT: {{^[ \t]}}arg_f64 %b; +; HSAIL-NEXT: st_arg_align(4)_u32 [[LDX]], [%a]; +; HSAIL-NEXT: st_arg_align(8)_f64 [[LDY]], [%b]; +; HSAIL-NEXT: call &two_args () (%a, %b);{{$}} +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_two_args(i32 %x, double %y) #0 { + call void @two_args(i32 %x, double %y) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_call_two_anon_args()( +; HSAIL-NEXT: arg_f64 %x, +; HSAIL-NEXT: arg_u32 %y) +; HSAIL-NEXT: { +; HSAIL: ld_arg_align(8)_f64 [[LDX:\$d[0-9]+]], [%x]; +; HSAIL-NEXT: ld_arg_align(4)_u32 [[LDY:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: { +; HSAIL-NEXT: {{^[ \t]}}arg_f64 %__param_p0; +; HSAIL-NEXT: {{^[ \t]}}arg_u32 %__param_p1; +; HSAIL-NEXT: st_arg_align(8)_f64 [[LDX]], [%__param_p0]; +; HSAIL-NEXT: st_arg_align(4)_u32 [[LDY]], [%__param_p1]; +; HSAIL-NEXT: call &two_anon_args () (%__param_p0, %__param_p1);{{$}} +; HSAIL-NEXT: } +; HSAIL: } +define void @test_call_two_anon_args(double %x, i32 %y) #0 { + call void @two_anon_args(double %x, i32 %y) #0 + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/conversion_intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/conversion_intrinsics.ll @@ -0,0 +1,543 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; float to int + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_neari_f32( +; HSAIL: cvt_ftz_neari_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_s32_neari_f32(float %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.s32.neari.f32(float %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_downi_f32( +; HSAIL: cvt_ftz_downi_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_s32_downi_f32(float %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.s32.downi.f32(float %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_upi_f32( +; HSAIL: cvt_ftz_upi_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_s32_upi_f32(float %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.s32.upi.f32(float %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_zeroi_f32( +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_s32_zeroi_f32(float %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.s32.zeroi.f32(float %x) #0 + ret i32 %y +} + +; float to unsigned int + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u32_neari_f32( +; HSAIL: cvt_ftz_neari_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_u32_neari_f32(float %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.u32.neari.f32(float %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u32_downi_f32( +; HSAIL: cvt_ftz_downi_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_u32_downi_f32(float %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.u32.downi.f32(float %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u32_upi_f32( +; HSAIL: cvt_ftz_upi_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_u32_upi_f32(float %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.u32.upi.f32(float %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u32_zeroi_f32( +; HSAIL: cvt_ftz_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_u32_zeroi_f32(float %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.u32.zeroi.f32(float %x) #0 + ret i32 %y +} + + + ; float to long + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s64_neari_f32( +; HSAIL: cvt_ftz_neari_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}} +define i64 @test_cvt_s64_neari_f32(float %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.s64.neari.f32(float %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s64_downi_f32( +; HSAIL: cvt_ftz_downi_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}} +define i64 @test_cvt_s64_downi_f32(float %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.s64.downi.f32(float %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s64_upi_f32( +; HSAIL: cvt_ftz_upi_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}} +define i64 @test_cvt_s64_upi_f32(float %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.s64.upi.f32(float %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s64_zeroi_f32( +; HSAIL: cvt_ftz_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}} +define i64 @test_cvt_s64_zeroi_f32(float %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.s64.zeroi.f32(float %x) #0 + ret i64 %y +} + +; float to unsigned long + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u64_neari_f32( +; HSAIL: cvt_ftz_neari_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}} +define i64 @test_cvt_u64_neari_f32(float %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.u64.neari.f32(float %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u64_downi_f32( +; HSAIL: cvt_ftz_downi_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}} +define i64 @test_cvt_u64_downi_f32(float %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.u64.downi.f32(float %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u64_upi_f32( +; HSAIL: cvt_ftz_upi_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}} +define i64 @test_cvt_u64_upi_f32(float %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.u64.upi.f32(float %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u64_zeroi_f32( +; HSAIL: cvt_ftz_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}} +define i64 @test_cvt_u64_zeroi_f32(float %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.u64.zeroi.f32(float %x) #0 + ret i64 %y +} + +; double to int + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_neari_f64( +; HSAIL: cvt_neari_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_s32_neari_f64(double %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.s32.neari.f64(double %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_downi_f64( +; HSAIL: cvt_downi_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_s32_downi_f64(double %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.s32.downi.f64(double %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_upi_f64( +; HSAIL: cvt_upi_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_s32_upi_f64(double %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.s32.upi.f64(double %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_zeroi_f64( +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_s32_zeroi_f64(double %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.s32.zeroi.f64(double %x) #0 + ret i32 %y +} + + +; double to unsigned int + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u32_neari_f64( +; HSAIL: cvt_neari_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_u32_neari_f64(double %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.u32.neari.f64(double %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u32_downi_f64( +; HSAIL: cvt_downi_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_u32_downi_f64(double %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.u32.downi.f64(double %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u32_upi_f64( +; HSAIL: cvt_upi_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_u32_upi_f64(double %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.u32.upi.f64(double %x) #0 + ret i32 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u32_zeroi_f64( +; HSAIL: cvt_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_u32_zeroi_f64(double %x) #1 { + %y = call i32 @llvm.HSAIL.cvt.u32.zeroi.f64(double %x) #0 + ret i32 %y +} + +; double to long + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s64_neari_f64( +; HSAIL: cvt_neari_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define i64 @test_cvt_s64_neari_f64(double %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.s64.neari.f64(double %x) #0 + ret i64 %y + +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s64_downi_f64( +; HSAIL: cvt_downi_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define i64 @test_cvt_s64_downi_f64(double %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.s64.downi.f64(double %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s64_upi_f64( +; HSAIL: cvt_upi_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define i64 @test_cvt_s64_upi_f64(double %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.s64.upi.f64(double %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s64_zeroi_f64( +; HSAIL: cvt_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define i64 @test_cvt_s64_zeroi_f64(double %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.s64.zeroi.f64(double %x) #0 + ret i64 %y +} + +; double to unsigned long + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u64_neari_f64( +; HSAIL: cvt_neari_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define i64 @test_cvt_u64_neari_f64(double %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.u64.neari.f64(double %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u64_downi_f64( +; HSAIL: cvt_downi_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define i64 @test_cvt_u64_downi_f64(double %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.u64.downi.f64(double %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u64_upi_f64( +; HSAIL: cvt_upi_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define i64 @test_cvt_u64_upi_f64(double %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.u64.upi.f64(double %x) #0 + ret i64 %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_u64_zeroi_f64( +; HSAIL: cvt_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define i64 @test_cvt_u64_zeroi_f64(double %x) #1 { + %y = call i64 @llvm.HSAIL.cvt.u64.zeroi.f64(double %x) #0 + ret i64 %y +} + +; half to float + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_f16( +; HSAIL: cvt_f32_f16 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_f32_f16(i32 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.f16(i32 %x) #0 + ret float %y +} + +; float to half + +; HSAIL-LABEL: {{^}}prog function &test_cvt_zero_f16_f32( +; HSAIL: cvt_zero_f16_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_zero_f16_f32(float %x) #1 { + %y = call float @llvm.HSAIL.cvt.zero.f16.f32(float %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_near_f16_f32( +; HSAIL: cvt_f16_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_near_f16_f32(float %x) #1 { + %y = call float @llvm.HSAIL.cvt.near.f16.f32(float %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_down_f16_f32( +; HSAIL: cvt_down_f16_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_down_f16_f32(float %x) #1 { + %y = call float @llvm.HSAIL.cvt.down.f16.f32(float %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_up_f16_f32( +; HSAIL: cvt_up_f16_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_up_f16_f32(float %x) #1 { + %y = call float @llvm.HSAIL.cvt.up.f16.f32(float %x) #0 + ret float %y +} + +; double to half + +; HSAIL-LABEL: {{^}}prog function &test_cvt_zero_f16_f64( +; HSAIL: cvt_zero_f16_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_zero_f16_f64(double %x) #1 { + %y = call float @llvm.HSAIL.cvt.zero.f16.f64(double %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_near_f16_f64( +; HSAIL: cvt_f16_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_near_f16_f64(double %x) #1 { + %y = call float @llvm.HSAIL.cvt.near.f16.f64(double %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_down_f16_f64( +; HSAIL: cvt_down_f16_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_down_f16_f64(double %x) #1 { + %y = call float @llvm.HSAIL.cvt.down.f16.f64(double %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_up_f16_f64( +; HSAIL: cvt_up_f16_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_up_f16_f64(double %x) #1 { + %y = call float @llvm.HSAIL.cvt.up.f16.f64(double %x) #0 + ret float %y +} + + +; int to float + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_down_i32( +; HSAIL: cvt_down_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_f32_down_i32(i32 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.down.i32(i32 %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_up_i32( +; HSAIL: cvt_up_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_f32_up_i32(i32 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.up.i32(i32 %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_zero_i32( +; HSAIL: cvt_zero_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_f32_zero_i32(i32 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.zero.i32(i32 %x) #0 + ret float %y +} + + +; unsigned int to float + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_down_u32( +; HSAIL: cvt_down_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_f32_down_u32(i32 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.down.u32(i32 %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_up_u32( +; HSAIL: cvt_up_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_f32_up_u32(i32 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.up.u32(i32 %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_zero_u32( +; HSAIL: cvt_zero_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_cvt_f32_zero_u32(i32 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.zero.u32(i32 %x) #0 + ret float %y +} + + +; long to float + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_down_i64( +; HSAIL: cvt_down_f32_s64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_down_i64(i64 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.down.i64(i64 %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_up_i64( +; HSAIL: cvt_up_f32_s64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_up_i64(i64 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.up.i64(i64 %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_zero_i64( +; HSAIL: cvt_zero_f32_s64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_zero_i64(i64 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.zero.i64(i64 %x) #0 + ret float %y +} + +; unsigned long to float + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_down_u64( +; HSAIL: cvt_down_f32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_down_u64(i64 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.down.u64(i64 %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_up_u64( +; HSAIL: cvt_up_f32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_up_u64(i64 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.up.u64(i64 %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_zero_u64( +; HSAIL: cvt_zero_f32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_zero_u64(i64 %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.zero.u64(i64 %x) #0 + ret float %y +} + +; long to double + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f64_down_i64( +; HSAIL: cvt_down_f64_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define double @test_cvt_f64_down_i64(i64 %x) #1 { + %y = call double @llvm.HSAIL.cvt.f64.down.i64(i64 %x) #0 + ret double %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f64_up_i64( +; HSAIL: cvt_up_f64_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define double @test_cvt_f64_up_i64(i64 %x) #1 { + %y = call double @llvm.HSAIL.cvt.f64.up.i64(i64 %x) #0 + ret double %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f64_zero_i64( +; HSAIL: cvt_zero_f64_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define double @test_cvt_f64_zero_i64(i64 %x) #1 { + %y = call double @llvm.HSAIL.cvt.f64.zero.i64(i64 %x) #0 + ret double %y +} + +; unsigned long to double +; HSAIL-LABEL: {{^}}prog function &test_cvt_f64_down_u64( +; HSAIL: cvt_down_f64_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define double @test_cvt_f64_down_u64(i64 %x) #1 { + %y = call double @llvm.HSAIL.cvt.f64.down.u64(i64 %x) #0 + ret double %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f64_up_u64( +; HSAIL: cvt_up_f64_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define double @test_cvt_f64_up_u64(i64 %x) #1 { + %y = call double @llvm.HSAIL.cvt.f64.up.u64(i64 %x) #0 + ret double %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f64_zero_u64( +; HSAIL: cvt_zero_f64_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define double @test_cvt_f64_zero_u64(i64 %x) #1 { + %y = call double @llvm.HSAIL.cvt.f64.zero.u64(i64 %x) #0 + ret double %y +} + + +; double to float + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_down_f64( +; HSAIL: cvt_ftz_down_f32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_down_f64(double %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.down.f64(double %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_up_f64( +; HSAIL: cvt_ftz_up_f32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_up_f64(double %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.up.f64(double %x) #0 + ret float %y +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_f32_zero_f64( +; HSAIL: cvt_ftz_zero_f32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define float @test_cvt_f32_zero_f64(double %x) #1 { + %y = call float @llvm.HSAIL.cvt.f32.zero.f64(double %x) #0 + ret float %y +} + +declare i32 @llvm.HSAIL.cvt.s32.neari.f32(float) #0 +declare i32 @llvm.HSAIL.cvt.s32.downi.f32(float) #0 +declare i32 @llvm.HSAIL.cvt.s32.upi.f32(float) #0 +declare i32 @llvm.HSAIL.cvt.s32.zeroi.f32(float) #0 +declare i32 @llvm.HSAIL.cvt.u32.neari.f32(float) #0 +declare i32 @llvm.HSAIL.cvt.u32.downi.f32(float) #0 +declare i32 @llvm.HSAIL.cvt.u32.upi.f32(float) #0 +declare i32 @llvm.HSAIL.cvt.u32.zeroi.f32(float) #0 +declare i64 @llvm.HSAIL.cvt.s64.neari.f32(float) #0 +declare i64 @llvm.HSAIL.cvt.s64.downi.f32(float) #0 +declare i64 @llvm.HSAIL.cvt.s64.upi.f32(float) #0 +declare i64 @llvm.HSAIL.cvt.s64.zeroi.f32(float) #0 +declare i64 @llvm.HSAIL.cvt.u64.neari.f32(float) #0 +declare i64 @llvm.HSAIL.cvt.u64.downi.f32(float) #0 +declare i64 @llvm.HSAIL.cvt.u64.upi.f32(float) #0 +declare i64 @llvm.HSAIL.cvt.u64.zeroi.f32(float) #0 +declare i32 @llvm.HSAIL.cvt.s32.neari.f64(double) #0 +declare i32 @llvm.HSAIL.cvt.s32.downi.f64(double) #0 +declare i32 @llvm.HSAIL.cvt.s32.upi.f64(double) #0 +declare i32 @llvm.HSAIL.cvt.s32.zeroi.f64(double) #0 +declare i32 @llvm.HSAIL.cvt.u32.neari.f64(double) #0 +declare i32 @llvm.HSAIL.cvt.u32.downi.f64(double) #0 +declare i32 @llvm.HSAIL.cvt.u32.upi.f64(double) #0 +declare i32 @llvm.HSAIL.cvt.u32.zeroi.f64(double) #0 +declare i64 @llvm.HSAIL.cvt.s64.neari.f64(double) #0 +declare i64 @llvm.HSAIL.cvt.s64.downi.f64(double) #0 +declare i64 @llvm.HSAIL.cvt.s64.upi.f64(double) #0 +declare i64 @llvm.HSAIL.cvt.s64.zeroi.f64(double) #0 +declare i64 @llvm.HSAIL.cvt.u64.neari.f64(double) #0 +declare i64 @llvm.HSAIL.cvt.u64.downi.f64(double) #0 +declare i64 @llvm.HSAIL.cvt.u64.upi.f64(double) #0 +declare i64 @llvm.HSAIL.cvt.u64.zeroi.f64(double) #0 +declare float @llvm.HSAIL.cvt.f32.f16(i32) #0 +declare float @llvm.HSAIL.cvt.zero.f16.f32(float) #0 +declare float @llvm.HSAIL.cvt.near.f16.f32(float) #0 +declare float @llvm.HSAIL.cvt.down.f16.f32(float) #0 +declare float @llvm.HSAIL.cvt.up.f16.f32(float) #0 +declare float @llvm.HSAIL.cvt.zero.f16.f64(double) #0 +declare float @llvm.HSAIL.cvt.near.f16.f64(double) #0 +declare float @llvm.HSAIL.cvt.down.f16.f64(double) #0 +declare float @llvm.HSAIL.cvt.up.f16.f64(double) #0 +declare float @llvm.HSAIL.cvt.f32.down.i32(i32) #0 +declare float @llvm.HSAIL.cvt.f32.up.i32(i32) #0 +declare float @llvm.HSAIL.cvt.f32.zero.i32(i32) #0 +declare float @llvm.HSAIL.cvt.f32.down.u32(i32) #0 +declare float @llvm.HSAIL.cvt.f32.up.u32(i32) #0 +declare float @llvm.HSAIL.cvt.f32.zero.u32(i32) #0 +declare float @llvm.HSAIL.cvt.f32.down.i64(i64) #0 +declare float @llvm.HSAIL.cvt.f32.up.i64(i64) #0 +declare float @llvm.HSAIL.cvt.f32.zero.i64(i64) #0 +declare float @llvm.HSAIL.cvt.f32.down.u64(i64) #0 +declare float @llvm.HSAIL.cvt.f32.up.u64(i64) #0 +declare float @llvm.HSAIL.cvt.f32.zero.u64(i64) #0 +declare double @llvm.HSAIL.cvt.f64.down.i64(i64) #0 +declare double @llvm.HSAIL.cvt.f64.up.i64(i64) #0 +declare double @llvm.HSAIL.cvt.f64.zero.i64(i64) #0 +declare double @llvm.HSAIL.cvt.f64.down.u64(i64) #0 +declare double @llvm.HSAIL.cvt.f64.up.u64(i64) #0 +declare double @llvm.HSAIL.cvt.f64.zero.u64(i64) #0 +declare float @llvm.HSAIL.cvt.f32.down.f64(double) #0 +declare float @llvm.HSAIL.cvt.f32.up.f64(double) #0 +declare float @llvm.HSAIL.cvt.f32.zero.f64(double) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/conversion_patterns.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/conversion_patterns.ll @@ -0,0 +1,90 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_neari_f32_pat( +; HSAIL: cvt_ftz_neari_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +; HSAIL-NEXT: st_arg_align(4)_u32 +; HSAIL-NEXT: ret +define i32 @test_cvt_s32_neari_f32_pat(float %x) #1 { + %round = call float @llvm.rint.f32(float %x) #0 + %cvt = fptosi float %round to i32 + ret i32 %cvt +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_neari_f64_pat( +; HSAIL: cvt_neari_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +; HSAIL-NEXT: st_arg_align(4)_u32 +; HSAIL-NEXT: ret +define i32 @test_cvt_s32_neari_f64_pat(double %x) #1 { + %round = call double @llvm.rint.f64(double %x) #0 + %cvt = fptosi double %round to i32 + ret i32 %cvt +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_downi_f32_pat( +; HSAIL: cvt_ftz_downi_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_s32_downi_f32_pat(float %x) #1 { + %round = call float @llvm.floor.f32(float %x) #0 + %cvt = fptosi float %round to i32 + ret i32 %cvt +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_downi_f64_pat( +; HSAIL: cvt_downi_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_s32_downi_f64_pat(double %x) #1 { + %round = call double @llvm.floor.f64(double %x) #0 + %cvt = fptosi double %round to i32 + ret i32 %cvt +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_upi_f32( +; HSAIL: cvt_ftz_upi_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define i32 @test_cvt_s32_upi_f32(float %x) #1 { + %round = call float @llvm.ceil.f32(float %x) #0 + %cvt = fptosi float %round to i32 + ret i32 %cvt +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_upi_f64( +; HSAIL: cvt_upi_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +define i32 @test_cvt_s32_upi_f64(double %x) #1 { + %round = call double @llvm.ceil.f64(double %x) #0 + %cvt = fptosi double %round to i32 + ret i32 %cvt +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_zeroi_f32_pat( +; HSAIL-NOT: trunc +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +; HSAIL-NEXT: st_arg_align(4)_u32 +; HSAIL-NEXT: ret +define i32 @test_cvt_s32_zeroi_f32_pat(float %x) #1 { + %round = call float @llvm.trunc.f32(float %x) #0 + %cvt = fptosi float %round to i32 + ret i32 %cvt +} + +; HSAIL-LABEL: {{^}}prog function &test_cvt_s32_zeroi_f64_pat( +; HSAIL-NOT: trunc +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}} +; HSAIL-NEXT: st_arg_align(4)_u32 +; HSAIL-NEXT: ret +define i32 @test_cvt_s32_zeroi_f64_pat(double %x) #1 { + %round = call double @llvm.trunc.f64(double %x) #0 + %cvt = fptosi double %round to i32 + ret i32 %cvt +} + +declare float @llvm.rint.f32(float) #0 +declare double @llvm.rint.f64(double) #0 + +declare float @llvm.floor.f32(float) #0 +declare double @llvm.floor.f64(double) #0 + +declare float @llvm.ceil.f32(float) #0 +declare double @llvm.ceil.f64(double) #0 + +declare float @llvm.trunc.f32(float) #0 +declare double @llvm.trunc.f64(double) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/copy-illegal-type.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/copy-illegal-type.ll @@ -0,0 +1,114 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_copy_v4i8 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind { + %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_copy_v4i8_x2 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out0]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out1]; +define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind { + %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_copy_v4i8_x3 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out0]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out1]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out2]; +define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind { + %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_copy_v4i8_x4 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out0]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out1]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out2]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out3]; +define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind { + %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out3, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_copy_v4i8_extra_use +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out0]; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out1]; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind { + %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + %add = add <4 x i8> %val, + store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4 + store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_copy_v4i8_x2_extra_use +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out0]; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out1]; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out2]; +define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind { + %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + %add = add <4 x i8> %val, + store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4 + store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_copy_v3i8 +; HSAIL-DAG: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_align(4)_u16 [[IN]], {{\[}}[[OUT]]{{\]}}; +; HSAIL: shr_u32 [[TRUNC:\$s[0-9]+]], [[IN]], 16; +; HSAIL: st_global_align(2)_u8 [[TRUNC]], {{\[}}[[OUT]]+2]; +; HSAIL: ret; +define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind { + %val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4 + store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_copy_v4i8_volatile_load +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind { + %val = load volatile <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_copy_v4i8_volatile_store +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind { + %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/HSAIL/ctlz.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ctlz.ll @@ -0,0 +1,104 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.ctlz.i32(i32, i1) #0 +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #0 +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #0 + +declare i64 @llvm.ctlz.i64(i64, i1) #0 +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0 +declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0 + + +; HSAIL-LABEL: {{^}}prog function &s_ctlz_i32( +; HSAIL: firstbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) #1 { + %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) #0 + store i32 %ctlz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_i32( +; HSAIL: firstbit_u32_u32 +define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #1 { + %val = load i32, i32 addrspace(1)* %valptr + %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) #0 + store i32 %ctlz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_v2i32( +; HSAIL: firstbit_u32_u32 +; HSAIL: firstbit_u32_u32 +define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #1 { + %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) #0 + store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_v4i32( +; HSAIL: firstbit_u32_u32 +; HSAIL: firstbit_u32_u32 +; HSAIL: firstbit_u32_u32 +; HSAIL: firstbit_u32_u32 +define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) #1 { + %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr + %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) #0 + store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_ctlz_i64( +; HSAIL: firstbit_u32_u64 [[CTLZ:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL: cvt_u64_u32 [[EXT:\$d[0-9]+]], [[CTLZ]]; +; HSAIL: st_global_align(8)_u64 [[EXT]] +define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) #1 { + %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false) #0 + store i64 %ctlz, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_ctlz_i64_trunc_i32( +; HSAIL: firstbit_u32_u64 [[CTLZ:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL-NOT: cvt +; HSAIL: st_global_align(4)_u32 [[CTLZ]] +define void @s_ctlz_i64_trunc_i32(i32 addrspace(1)* noalias %out, i64 %val) #1 { + %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false) #0 + %ctlz.trunc = trunc i64 %ctlz to i32 + store i32 %ctlz.trunc, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_i64( +; HSAIL: firstbit_u32_u64 +define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #1 { + %val = load i64, i64 addrspace(1)* %valptr + %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false) #0 + store i64 %ctlz, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_v2i64( +; HSAIL: firstbit_u32_u64 +; HSAIL: firstbit_u32_u64 +define void @v_ctlz_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #1 { + %val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr + %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val, i1 false) #0 + store <2 x i64> %ctlz, <2 x i64> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_v4i64( +; HSAIL: firstbit_u32_u64 +; HSAIL: firstbit_u32_u64 +; HSAIL: firstbit_u32_u64 +; HSAIL: firstbit_u32_u64 +define void @v_ctlz_v4i64(<4 x i64> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %valptr) #1 { + %val = load <4 x i64>, <4 x i64> addrspace(1)* %valptr + %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %val, i1 false) #0 + store <4 x i64> %ctlz, <4 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/ctlz_zero_undef.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ctlz_zero_undef.ll @@ -0,0 +1,104 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.ctlz.i32(i32, i1) #0 +declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) #0 +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) #0 + +declare i64 @llvm.ctlz.i64(i64, i1) #0 +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) #0 +declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) #0 + + +; HSAIL-LABEL: {{^}}prog function &s_ctlz_zero_undef_i32( +; HSAIL: firstbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) #1 { + %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) #0 + store i32 %ctlz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_zero_undef_i32( +; HSAIL: firstbit_u32_u32 +define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #1 { + %val = load i32, i32 addrspace(1)* %valptr + %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) #0 + store i32 %ctlz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_zero_undef_v2i32( +; HSAIL: firstbit_u32_u32 +; HSAIL: firstbit_u32_u32 +define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #1 { + %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr + %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) #0 + store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_zero_undef_v4i32( +; HSAIL: firstbit_u32_u32 +; HSAIL: firstbit_u32_u32 +; HSAIL: firstbit_u32_u32 +; HSAIL: firstbit_u32_u32 +define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) #1 { + %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr + %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) #0 + store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_ctlz_zero_undef_i64( +; HSAIL: firstbit_u32_u64 [[CTLZ:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL: cvt_u64_u32 [[EXT:\$d[0-9]+]], [[CTLZ]]; +; HSAIL: st_global_align(8)_u64 [[EXT]] +define void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 %val) #1 { + %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true) #0 + store i64 %ctlz, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_ctlz_zero_undef_i64_trunc_i32( +; HSAIL: firstbit_u32_u64 [[CTLZ:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL-NOT: cvt +; HSAIL: st_global_align(4)_u32 [[CTLZ]] +define void @s_ctlz_zero_undef_i64_trunc_i32(i32 addrspace(1)* noalias %out, i64 %val) #1 { + %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true) #0 + %ctlz.trunc = trunc i64 %ctlz to i32 + store i32 %ctlz.trunc, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_zero_undef_i64( +; HSAIL: firstbit_u32_u64 +define void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #1 { + %val = load i64, i64 addrspace(1)* %valptr + %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true) #0 + store i64 %ctlz, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_zero_undef_v2i64( +; HSAIL: firstbit_u32_u64 +; HSAIL: firstbit_u32_u64 +define void @v_ctlz_zero_undef_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #1 { + %val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr + %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %val, i1 true) #0 + store <2 x i64> %ctlz, <2 x i64> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctlz_zero_undef_v4i64( +; HSAIL: firstbit_u32_u64 +; HSAIL: firstbit_u32_u64 +; HSAIL: firstbit_u32_u64 +; HSAIL: firstbit_u32_u64 +define void @v_ctlz_zero_undef_v4i64(<4 x i64> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %valptr) #1 { + %val = load <4 x i64>, <4 x i64> addrspace(1)* %valptr + %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %val, i1 true) #0 + store <4 x i64> %ctlz, <4 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/ctpop.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ctpop.ll @@ -0,0 +1,331 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.ctpop.i32(i32) #0 +declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) #0 +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) #0 +declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) #0 +declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) #0 + +declare i64 @llvm.ctpop.i64(i64) #0 +declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) #0 +declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) #0 +declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) #0 +declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) #0 + +declare i32 @llvm.HSAIL.popcount.u32.b32(i32) #0 + +; HSAIL-LABEL: {{^}}prog function &s_ctpop_i32( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%val]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) #1 { + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) #0 + store i32 %ctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_i32( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) #1 { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) #0 + store i32 %ctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_add_chain_i32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in1]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in0]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) #1 { + %val0 = load i32, i32 addrspace(1)* %in0, align 4 + %val1 = load i32, i32 addrspace(1)* %in1, align 4 + %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) #0 + %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) #0 + %add = add i32 %ctpop0, %ctpop1 + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_v2i32( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) #1 { + %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 + %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) #0 + store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_v4i32( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) #1 { + %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16 + %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) #0 + store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_v8i32( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) #1 { + %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32 + %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) #0 + store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_v16i32( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) #1 { + %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32 + %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) #0 + store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_i32_add_inline_constant( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 4; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) #1 { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) #0 + %add = add i32 %ctpop, 4 + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_i32_add_inline_constant_inv( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 4; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) #1 { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) #0 + %add = add i32 4, %ctpop + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_i32_add_literal( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 99999; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) #1 { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) #0 + %add = add i32 %ctpop, 99999 + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_i32_add_var( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%const]; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) #1 { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) #0 + %add = add i32 %ctpop, %const + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_i32_add_var_inv( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%const]; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) #1 { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) #0 + %add = add i32 %const, %ctpop + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_i32_add_vvar_inv( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%constptr]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) #1 { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ctpop = call i32 @llvm.ctpop.i32(i32 %val) #0 + %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4 + %const = load i32, i32 addrspace(1)* %gep, align 4 + %add = add i32 %const, %ctpop + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ctpop_i32_in_br( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%cond]; +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: cbr_b1 {{\$c[0-9]+}}, @BB13_1; +; HSAIL: br @BB13_3; +; HSAIL: {{^@BB13_1:}} +; HSAIL: ld_global_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: {{^@BB13_3:}} +; HSAIL: st_global_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) #1 { +entry: + %0 = icmp eq i32 %cond, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i32, i32 addrspace(1)* %in + %2 = call i32 @llvm.ctpop.i32(i32 %1) + br label %endif + +else: + %3 = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %4 = load i32, i32 addrspace(1)* %3 + br label %endif + +endif: + %5 = phi i32 [%2, %if], [%4, %else] + store i32 %5, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_ctpop_i64( +; HSAIL: popcount_u32_b64 [[RESULT:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL-NOT: cvt +; HSAIL: st_global_align(4)_u32 [[RESULT]] +define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) #1 { + %ctpop = call i64 @llvm.ctpop.i64(i64 %val) #0 + %truncctpop = trunc i64 %ctpop to i32 + store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ctpop_no_trunc_i64( +; HSAIL: popcount_u32_b64 [[POPCNT:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL: cvt_u64_u32 [[RESULT:\$d[0-9]+]], [[POPCNT]] +; HSAIL: st_global_align(8)_u64 [[RESULT]] +define void @ctpop_no_trunc_i64(i64 addrspace(1)* noalias %out, i64 %val) #1 { + %ctpop = call i64 @llvm.ctpop.i64(i64 %val) #0 + store i64 %ctpop, i64 addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_i64( +define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #1 { + %val = load i64, i64 addrspace(1)* %in, align 8 + %ctpop = call i64 @llvm.ctpop.i64(i64 %val) #0 + %truncctpop = trunc i64 %ctpop to i32 + store i32 %truncctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_ctpop_v2i64( +define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) #1 { + %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) #0 + %truncctpop = trunc <2 x i64> %ctpop to <2 x i32> + store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_ctpop_v4i64( +define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) #1 { + %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) #0 + %truncctpop = trunc <4 x i64> %ctpop to <4 x i32> + store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_v2i64( +define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) #1 { + %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16 + %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) #0 + %truncctpop = trunc <2 x i64> %ctpop to <2 x i32> + store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_ctpop_v4i64( +define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) #1 { + %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32 + %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) #0 + %truncctpop = trunc <4 x i64> %ctpop to <4 x i32> + store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ctpop_i64_in_br( +define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) #1 { +entry: + %tmp0 = icmp eq i32 %cond, 0 + br i1 %tmp0, label %if, label %else + +if: + %tmp2 = call i64 @llvm.ctpop.i64(i64 %ctpop_arg) + br label %endif + +else: + %tmp3 = getelementptr i64, i64 addrspace(1)* %in, i32 1 + %tmp4 = load i64, i64 addrspace(1)* %tmp3 + br label %endif + +endif: + %tmp5 = phi i64 [%tmp2, %if], [%tmp4, %else] + store i64 %tmp5, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_popcount_i32( +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%val]; +; HSAIL: popcount_u32_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @legacy_hsail_popcount_i32(i32 addrspace(1)* noalias %out, i32 %val) #1 { + %ctpop = call i32 @llvm.HSAIL.popcount.u32.b32(i32 %val) #0 + store i32 %ctpop, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/cttz.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/cttz.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.cttz.i32(i32, i1) #0 +declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) #0 +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) #0 + +declare i64 @llvm.cttz.i64(i64, i1) #0 +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0 +declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) #0 + +; HSAIL-LABEL: {{^}}prog function &s_cttz_i32( +; HSAIL: lastbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_cttz_i32(i32 addrspace(1)* noalias %out, i32 %val) #1 { + %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) #0 + store i32 %cttz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_i32( +; HSAIL: lastbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_cttz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #1 { + %val = load i32, i32 addrspace(1)* %valptr + %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 false) #0 + store i32 %cttz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_v2i32( +; HSAIL: lastbit_u32_u32 +; HSAIL: lastbit_u32_u32 +define void @v_cttz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #1 { + %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 false) #0 + store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_v4i32( +; HSAIL: lastbit_u32_u32 +; HSAIL: lastbit_u32_u32 +; HSAIL: lastbit_u32_u32 +; HSAIL: lastbit_u32_u32 +define void @v_cttz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) #1 { + %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr + %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 false) #0 + store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_cttz_i64( +; HSAIL: lastbit_u32_u64 [[CTTZ:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL: cvt_u64_u32 [[EXT:\$d[0-9]+]], [[CTTZ]]; +; HSAIL: st_global_align(8)_u64 [[EXT]] +define void @s_cttz_i64(i64 addrspace(1)* noalias %out, i64 %val) #1 { + %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false) #0 + store i64 %cttz, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_cttz_i64_trunc_i32( +; HSAIL: lastbit_u32_u64 [[CTTZ:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL-NOT: cvt +; HSAIL: st_global_align(4)_u32 [[CTTZ]] +define void @s_cttz_i64_trunc_i32(i32 addrspace(1)* noalias %out, i64 %val) #1 { + %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false) #0 + %trunc.cttz = trunc i64 %cttz to i32 + store i32 %trunc.cttz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_i64( +; HSAIL: lastbit_u32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @v_cttz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #1 { + %val = load i64, i64 addrspace(1)* %valptr + %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 false) #0 + store i64 %cttz, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_v2i64( +; HSAIL: lastbit_u32_u64 +; HSAIL: lastbit_u32_u64 +define void @v_cttz_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #1 { + %val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr + %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %val, i1 false) #0 + store <2 x i64> %cttz, <2 x i64> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_v4i64( +; HSAIL: lastbit_u32_u64 +; HSAIL: lastbit_u32_u64 +; HSAIL: lastbit_u32_u64 +; HSAIL: lastbit_u32_u64 +define void @v_cttz_v4i64(<4 x i64> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %valptr) #1 { + %val = load <4 x i64>, <4 x i64> addrspace(1)* %valptr + %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %val, i1 false) #0 + store <4 x i64> %cttz, <4 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/cttz_zero_undef.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/cttz_zero_undef.ll @@ -0,0 +1,103 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.cttz.i32(i32, i1) #0 +declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) #0 +declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) #0 + +declare i64 @llvm.cttz.i64(i64, i1) #0 +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) #0 +declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) #0 + +; HSAIL-LABEL: {{^}}prog function &s_cttz_zero_undef_i32( +; HSAIL: lastbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) #1 { + %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) #0 + store i32 %cttz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_zero_undef_i32( +; HSAIL: lastbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #1 { + %val = load i32, i32 addrspace(1)* %valptr + %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) #0 + store i32 %cttz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_zero_undef_v2i32( +; HSAIL: lastbit_u32_u32 +; HSAIL: lastbit_u32_u32 +define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #1 { + %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr + %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) #0 + store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_zero_undef_v4i32( +; HSAIL: lastbit_u32_u32 +; HSAIL: lastbit_u32_u32 +; HSAIL: lastbit_u32_u32 +; HSAIL: lastbit_u32_u32 +define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) #1 { + %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr + %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) #0 + store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_cttz_zero_undef_i64( +; HSAIL: lastbit_u32_u64 [[CTTZ:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL: cvt_u64_u32 [[EXT:\$d[0-9]+]], [[CTTZ]]; +; HSAIL: st_global_align(8)_u64 [[EXT]] +define void @s_cttz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 %val) #1 { + %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 true) #0 + store i64 %cttz, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_cttz_zero_undef_i64_trunc_i32( +; HSAIL: lastbit_u32_u64 [[CTTZ:\$s[0-9]+]], {{\$d[0-9]+}}; +; HSAIL-NOT: cvt +; HSAIL: st_global_align(4)_u32 [[CTTZ]] +define void @s_cttz_zero_undef_i64_trunc_i32(i32 addrspace(1)* noalias %out, i64 %val) #1 { + %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 true) #0 + %trunc.cttz = trunc i64 %cttz to i32 + store i32 %trunc.cttz, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_zero_undef_i64( +; HSAIL: lastbit_u32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @v_cttz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #1 { + %val = load i64, i64 addrspace(1)* %valptr + %cttz = call i64 @llvm.cttz.i64(i64 %val, i1 true) #0 + store i64 %cttz, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_zero_undef_v2i64( +; HSAIL: lastbit_u32_u64 +; HSAIL: lastbit_u32_u64 +define void @v_cttz_zero_undef_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #1 { + %val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr + %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %val, i1 true) #0 + store <2 x i64> %cttz, <2 x i64> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_cttz_zero_undef_v4i64( +; HSAIL: lastbit_u32_u64 +; HSAIL: lastbit_u32_u64 +; HSAIL: lastbit_u32_u64 +; HSAIL: lastbit_u32_u64 +define void @v_cttz_zero_undef_v4i64(<4 x i64> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %valptr) #1 { + %val = load <4 x i64>, <4 x i64> addrspace(1)* %valptr + %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %val, i1 true) #0 + store <4 x i64> %cttz, <4 x i64> addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/cvt_f32_ubyte.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/cvt_f32_ubyte.ll @@ -0,0 +1,181 @@ +; XFAIL: * +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &load_i8_to_f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { + %load = load i8, i8 addrspace(1)* %in, align 1 + %cvt = uitofp i8 %load to float + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i8_to_v2f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <2 x i8>, <2 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <2 x i8> %load to <2 x float> + store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v3i8_to_v3f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: pack_u32x2_u32 {{\$d[0-9]+}}, _u32x2(0,0), {{\$s[0-9]+}}, 1; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <3 x i8> %load to <3 x float> + store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i8_to_v4f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <4 x i8> %load to <4 x float> + store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i8_to_v4f32_2_uses +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out2]; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + %cvt = uitofp <4 x i8> %load to <4 x float> + store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + %add = add <4 x i8> %load, ; Second use of %load + store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v7i8_to_v7f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 24; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: pack_u32x2_u32 {{\$d[0-9]+}}, _u32x2(0,0), {{\$s[0-9]+}}, 1; +; HSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: pack_u32x2_u32 {{\$d[0-9]+}}, _u32x2(0,0), {{\$s[0-9]+}}, 1; + +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 16, 8; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 255; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 65280; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 8; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: pack_u32x2_u32 {{\$d[0-9]+}}, _u32x2(0,0), {{\$s[0-9]+}}, 1; +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <7 x i8>, <7 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <7 x i8> %load to <7 x float> + store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v8i8_to_v8f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 1 + %cvt = uitofp <8 x i8> %load to <8 x float> + store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i8_zext_inreg_i32_to_f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 2; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 255; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { + %load = load i32, i32 addrspace(1)* %in, align 4 + %add = add i32 %load, 2 + %inreg = and i32 %add, 255 + %cvt = uitofp i32 %inreg to float + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i8_zext_inreg_hi1_to_f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 65280; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 8; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { + %load = load i32, i32 addrspace(1)* %in, align 4 + %inreg = and i32 %load, 65280 + %shr = lshr i32 %inreg, 8 + %cvt = uitofp i32 %shr to float + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i8_zext_i32_to_f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { + %load = load i8, i8 addrspace(1)* %in, align 1 + %ext = zext i8 %load to i32 + %cvt = uitofp i32 %ext to float + store float %cvt, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v4i8_zext_v4i32_to_v4f32 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1 + %ext = zext <4 x i8> %load to <4 x i32> + %cvt = uitofp <4 x i32> %ext to <4 x float> + store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 + ret void +} Index: test/CodeGen/HSAIL/dagcombiner-bug-illegal-vec4-int-to-fp.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/dagcombiner-bug-illegal-vec4-int-to-fp.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &sint +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { + %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %sint = load i32, i32 addrspace(1) * %in + %conv = sitofp i32 %sint to float + %tmp0 = insertelement <4 x float> undef, float %conv, i32 0 + %splat = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) { + %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %uint = load i32, i32 addrspace(1) * %in + %conv = uitofp i32 %uint to float + %tmp0 = insertelement <4 x float> undef, float %conv, i32 0 + %splat = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat, <4 x float> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/ds-negative-offset-addressing-mode-loop.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ds-negative-offset-addressing-mode-loop.ll @@ -0,0 +1,58 @@ +; RUN: llc -march=hsail -mattr=+gcn < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.get.global.id(i32) #0 +declare void @llvm.HSAIL.barrier() #1 + +; HSAIL-LABEL: {{^}}prog function &signed_ds_offset_addressing_loop()( +; HSAIL: @BB0_1: +; HSAIL: add_u32 [[PTR:\$s[0-9]+]] +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+4]; +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+128]; +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+132]; +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+256]; +; HSAIL: cbr_b1 +define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 { +entry: + %x.i = tail call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %mul = shl nsw i32 %x.i, 1 + br label %for.body + +for.body: ; preds = %for.body, %entry + %sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ] + %offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ] + %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + tail call void @llvm.HSAIL.barrier() #1 + %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02 + %tmp = load float, float addrspace(3)* %arrayidx, align 4 + %add1 = add nsw i32 %offset.02, 1 + %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add1 + %tmp1 = load float, float addrspace(3)* %arrayidx2, align 4 + %add3 = add nsw i32 %offset.02, 32 + %arrayidx4 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add3 + %tmp2 = load float, float addrspace(3)* %arrayidx4, align 4 + %add5 = add nsw i32 %offset.02, 33 + %arrayidx6 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add5 + %tmp3 = load float, float addrspace(3)* %arrayidx6, align 4 + %add7 = add nsw i32 %offset.02, 64 + %arrayidx8 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add7 + %tmp4 = load float, float addrspace(3)* %arrayidx8, align 4 + %add9 = fadd float %tmp, %tmp1 + %add10 = fadd float %add9, %tmp2 + %add11 = fadd float %add10, %tmp3 + %add12 = fadd float %add11, %tmp4 + %add13 = fadd float %sum.03, %add12 + %inc = add nsw i32 %k.01, 1 + %add14 = add nsw i32 %offset.02, 97 + %exitcond = icmp eq i32 %inc, 8 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + %tmp5 = sext i32 %x.i to i64 + %arrayidx15 = getelementptr inbounds float, float addrspace(1)* %out, i64 %tmp5 + store float %add13, float addrspace(1)* %arrayidx15, align 4 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { noduplicate nounwind } +attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/CodeGen/HSAIL/empty-function.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/empty-function.ll @@ -0,0 +1,99 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL: {{^}}decl prog function &empty_func_void()(); +; HSAIL: {{^}}decl prog function &__unnamed_1()(); + +; HSAIL: {{^}}decl prog function &empty_func_with_return_type(arg_f32 %ret)(); +; HSAIL: {{^}}decl prog function &__unnamed_2(arg_f32 %ret)(); +; HSAIL: {{^}}decl prog function &empty_func_with_struct_return_type(align(4) arg_u8 %ret[8])(); +; HSAIL: {{^}}decl prog function &empty_func_with_sext_return_type(arg_s32 %ret)(); +; HSAIL: {{^}}decl prog function &empty_func_with_zext_return_type(arg_u32 %ret)(); +; HSAIL: {{^}}decl prog function &unreachable_func_with_return_type(arg_f32 %ret)(arg_u32 %out); + +; HSAIL-LABEL: {{^}}prog function &empty_func_void()() +; HSAIL-NEXT: { +; HSAIL-NOT: ret; +; HSAIL: }; +define void @empty_func_void() { + unreachable +} + + +; HSAIL-NOT: decl +; HSAIL-NOT: global +; HSAIL-LABEL: {{^}}prog function &__unnamed_1()() +; HSAIL-NEXT: { +; HSAIL-NOT: ret; +; HSAIL: }; +define void @0() { + unreachable +} + +; Make sure we aren't printing extra declarations before function +; bodies, or an extra copy of the function name. + +; HSAIL-NOT: decl +; HSAIL-NOT: empty_kernel_void +; HSAIL-NOT: global + +; HSAIL-LABEL: {{^}}prog kernel &empty_kernel_void() +; HSAIL-NEXT: { +; HSAIL-NOT: ret; +; HSAIL: }; +define spir_kernel void @empty_kernel_void() { + unreachable +} + +; HSAIL-NOT: empty_func_with_return_type + +; HSAIL-LABEL: {{^}}prog function &empty_func_with_return_type(arg_f32 %empty_func_with_return_type)() +; HSAIL-NEXT: { +; HSAIL-NOT: ret; +; HSAIL: }; +define float @empty_func_with_return_type() { + unreachable +} + +; HSAIL-LABEL: {{^}}prog function &__unnamed_2(arg_f32 %__unnamed_2)() +; HSAIL-NEXT: { +; HSAIL-NOT: ret; +; HSAIL: }; +define float @1() { + unreachable +} + +%struct.pair = type { i32, i32 } + +; HSAIL-LABEL: {{^}}prog function &empty_func_with_struct_return_type(align(4) arg_u8 %empty_func_with_struct_return_type[8])() +; HSAIL-NEXT: { +; HSAIL-NOT: ret; +; HSAIL: }; +define %struct.pair @empty_func_with_struct_return_type() { + unreachable +} + +; HSAIL-LABEL: {{^}}prog function &empty_func_with_sext_return_type(arg_s32 %empty_func_with_sext_return_type)() +; HSAIL-NEXT: { +; HSAIL-NOT: ret; +; HSAIL: }; +define signext i8 @empty_func_with_sext_return_type() { + unreachable +} + +; HSAIL-LABEL: {{^}}prog function &empty_func_with_zext_return_type(arg_u32 %empty_func_with_zext_return_type)() +; HSAIL-NEXT: { +; HSAIL-NOT: ret; +; HSAIL: }; +define zeroext i8 @empty_func_with_zext_return_type() { + unreachable +} + +; HSAIL-LABEL: {{^}}prog function &unreachable_func_with_return_type(arg_f32 %unreachable_func_with_return_type)(arg_u32 %out) +; HSAIL-NEXT: { +; HSAIL: ld_arg_align(4)_u32 +; HSAIL-NEXT: st_global_align(4)_u32 +; HSAIL-NEXT: }; +define float @unreachable_func_with_return_type(i32 addrspace(1)* %out) { + store i32 123, i32 addrspace(1)* %out + unreachable +} Index: test/CodeGen/HSAIL/empty-functions.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/empty-functions.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL: decl prog function &empty()(); +; HSAIL: decl prog function &empty_w_arg()(arg_u32 %x); + +; HSAIL: decl prog function &empty_w_args()( +; HSAIL-NEXT: arg_u32 %x, +; HSAIL-NEXT: arg_u32 %y); + +; HSAIL: decl prog function &unreachable_empty()(); + +; HSAIL: prog function &empty()() +; HSAIL: ret; +; HSAIL-NEXT: }; +define void @empty() { + ret void +} + +; HSAIL: prog function &empty_w_arg()(arg_u32 %x) +define void @empty_w_arg(i32 %x) { + ret void +} + +; HSAIL: prog function &empty_w_args()( +; HSAIL-NEXT: arg_u32 %x, +; HSAIL-NEXT: arg_u32 %y) +define void @empty_w_args(i32 %x, i32 %y) { + ret void +} + +; HSAIL: prog function &unreachable_empty()() +; HSAIL-NOT: ret +; HSAIL: }; +define void @unreachable_empty() { + unreachable +} Index: test/CodeGen/HSAIL/ext-args.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ext-args.ll @@ -0,0 +1,129 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &i8_arg( +; HSAIL: ld_arg_u8 [[REG:\$s[0-9]+]], [%y]; +; HSAIL: st_global_u8 [[REG]] +define void @i8_arg(i8 %x, i8 %y, i8 addrspace(1)* %out) { + store i8 %y, i8 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i8_arg_sext( +; HSAIL: ld_arg_u8 [[REG0:\$s[0-9]+]], [%y]; +; HSAIL: shl_u32 [[REG1:\$s[0-9]+]], [[REG0]], 24; +; HSAIL: shr_s32 [[REG2:\$s[0-9]+]], [[REG1]], 24; +; HSAIL: st_global_align(4)_u32 [[REG2]] +define void @i8_arg_sext(i8 %x, i8 %y, i32 addrspace(1)* %out) { + %exty = sext i8 %y to i32 + store i32 %exty, i32 addrspace(1)* %out + ret void +} + +; FIXME: The and is redundant +; HSAIL-LABEL: {{^}}prog function &i8_arg_zext( +; HSAIL: ld_arg_u8 [[REG0:\$s[0-9]+]], [%y]; +; HSAIL: and_b32 [[REG1:\$s[0-9]+]], [[REG0]], 255; +; HSAIL: st_global_align(4)_u32 [[REG1]] +define void @i8_arg_zext(i8 %x, i8 %y, i32 addrspace(1)* %out) { + %exty = zext i8 %y to i32 + store i32 %exty, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i8_sext_arg_ext( +; HSAIL: ld_arg_s8 [[REG:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: st_global_align(4)_u32 [[REG]], +define void @i8_sext_arg_ext(i8 signext %x, i8 signext %y, i32 addrspace(1)* %out) { + %exty = sext i8 %y to i32 + store i32 %exty, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i8_zext_arg_ext( +; HSAIL: ld_arg_u8 [[REG:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: st_global_align(4)_u32 [[REG]], +define void @i8_zext_arg_ext(i8 zeroext %x, i8 zeroext %y, i32 addrspace(1)* %out) { + %exty = zext i8 %y to i32 + store i32 %exty, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i8_sext_arg( +; HSAIL: ld_arg_s8 [[REG:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: st_global_u8 [[REG]], +define void @i8_sext_arg(i8 signext %x, i8 signext %y, i8 addrspace(1)* %out) { + store i8 %y, i8 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i8_zext_arg( +; HSAIL: ld_arg_u8 [[REG:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: st_global_u8 [[REG]], +define void @i8_zext_arg(i8 zeroext %x, i8 zeroext %y, i8 addrspace(1)* %out) { + store i8 %y, i8 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i16_arg( +; HSAIL: ld_arg_align(2)_u16 [[REG:\$s[0-9]+]], [%y]; +; HSAIL: st_global_align(2)_u16 [[REG]] +define void @i16_arg(i16 %x, i16 %y, i16 addrspace(1)* %out) { + store i16 %y, i16 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i16_arg_sext( +; HSAIL: ld_arg_align(2)_u16 [[REG0:\$s[0-9]+]], [%y]; +; HSAIL: shl_u32 [[REG1:\$s[0-9]+]], [[REG0]], 16; +; HSAIL: shr_s32 [[REG2:\$s[0-9]+]], [[REG1]], 16; +; HSAIL: st_global_align(4)_u32 [[REG2]] +define void @i16_arg_sext(i16 %x, i16 %y, i32 addrspace(1)* %out) { + %exty = sext i16 %y to i32 + store i32 %exty, i32 addrspace(1)* %out + ret void +} + +; FIXME: The and is redundant +; HSAIL-LABEL: {{^}}prog function &i16_arg_zext( +; HSAIL: ld_arg_align(2)_u16 [[REG0:\$s[0-9]+]], [%y]; +; HSAIL: and_b32 [[REG1:\$s[0-9]+]], [[REG0]], 65535; +; HSAIL: st_global_align(4)_u32 [[REG1]] +define void @i16_arg_zext(i16 %x, i16 %y, i32 addrspace(1)* %out) { + %exty = zext i16 %y to i32 + store i32 %exty, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i16_sext_arg_ext( +; HSAIL: ld_arg_align(2)_s16 [[REG:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: st_global_align(4)_u32 [[REG]], +define void @i16_sext_arg_ext(i16 signext %x, i16 signext %y, i32 addrspace(1)* %out) { + %exty = sext i16 %y to i32 + store i32 %exty, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i16_zext_arg_ext( +; HSAIL: ld_arg_align(2)_u16 [[REG:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: st_global_align(4)_u32 [[REG]], +define void @i16_zext_arg_ext(i16 zeroext %x, i16 zeroext %y, i32 addrspace(1)* %out) { + %exty = zext i16 %y to i32 + store i32 %exty, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i16_sext_arg( +; HSAIL: ld_arg_align(2)_s16 [[REG:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: st_global_align(2)_u16 [[REG]], +define void @i16_sext_arg(i16 signext %x, i16 signext %y, i16 addrspace(1)* %out) { + store i16 %y, i16 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i16_zext_arg( +; HSAIL: ld_arg_align(2)_u16 [[REG:\$s[0-9]+]], [%y]; +; HSAIL-NEXT: st_global_align(2)_u16 [[REG]], +define void @i16_zext_arg(i16 zeroext %x, i16 zeroext %y, i16 addrspace(1)* %out) { + store i16 %y, i16 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/extension-features.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/extension-features.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=NONE %s +; RUN: llc -march=hsail -mattr=+gcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=hsail -mattr=+images < %s | FileCheck -check-prefix=IMAGES %s +; RUN: llc -march=hsail -mattr=+gcn,+images < %s | FileCheck -check-prefix=GCN -check-prefix=IMAGES %s +; RUN: llc -march=hsail -mcpu=kaveri < %s | FileCheck -check-prefix=GCN -check-prefix=IMAGES %s + +; NONE-NOT: extension +; GCN: extension "amd:gcn"; +; IMAGES: extension "IMAGE"; + +; We must have at least one function for subtarget features to be +; detected for the module. +define void @foo() { + ret void +} Index: test/CodeGen/HSAIL/extract_vector_elt_i16.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/extract_vector_elt_i16.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &extract_vector_elt_v2i16 +; HSAIL: ld_arg +; HSAIL: ld_arg +; HSAIL: ld_arg +; HSAIL: st_global +; HSAIL: st_global +; HSAIL: ret; +define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind { + %p0 = extractelement <2 x i16> %foo, i32 0 + %p1 = extractelement <2 x i16> %foo, i32 1 + %out1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 + store i16 %p1, i16 addrspace(1)* %out, align 2 + store i16 %p0, i16 addrspace(1)* %out1, align 2 + ret void +} + +; FUNC-LABEL: {{^}}prog function &extract_vector_elt_v4i16 +; HSAIL: ld_arg +; HSAIL: ld_arg +; HSAIL: ld_arg +; HSAIL: st_global +; HSAIL: st_global +; HSAIL: ret; +define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind { + %p0 = extractelement <4 x i16> %foo, i32 0 + %p1 = extractelement <4 x i16> %foo, i32 2 + %out1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 + store i16 %p1, i16 addrspace(1)* %out, align 2 + store i16 %p0, i16 addrspace(1)* %out1, align 2 + ret void +} Index: test/CodeGen/HSAIL/fabs.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fabs.ll @@ -0,0 +1,167 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @fabsf(float) readnone +declare float @llvm.fabs.f32(float) readnone +declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone +declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone + +declare double @fabs(double) readnone +declare double @llvm.fabs.f64(double) readnone +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone +declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone + +declare float @llvm.HSAIL.abs.f32(float) readnone +declare double @llvm.HSAIL.abs.f64(double) readnone + + +; DAGCombiner will transform: +; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF)) +; unless isFabsFree returns true + +; HSAIL-LABEL: {{^}}prog function &fabs_fn_free( +; HSAIL: abs_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) { + %bc= bitcast i32 %in to float + %fabs = call float @fabsf(float %bc) + store float %fabs, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_free( +; HSAIL: abs_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fabs_free(float addrspace(1)* %out, i32 %in) { + %bc= bitcast i32 %in to float + %fabs = call float @llvm.fabs.f32(float %bc) + store float %fabs, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_f32( +; HSAIL: abs_f32 +define void @fabs_f32(float addrspace(1)* %out, float %in) { + %fabs = call float @llvm.fabs.f32(float %in) + store float %fabs, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_v2f32( +; HSAIL: abs_f32 +; HSAIL: abs_f32 +define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { + %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) + store <2 x float> %fabs, <2 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_v4f32( +; HSAIL: abs_f32 +; HSAIL: abs_f32 +; HSAIL: abs_f32 +; HSAIL: abs_f32 +define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { + %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) + store <4 x float> %fabs, <4 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_fn_fold( +; HSAIL: abs_f32 +; HSAIL: mul_ftz_f32 +define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) { + %fabs = call float @fabsf(float %in0) + %fmul = fmul float %fabs, %in1 + store float %fmul, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_fold( +; HSAIL: abs_f32 +; HSAIL: mul_ftz_f32 +define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) { + %fabs = call float @llvm.fabs.f32(float %in0) + %fmul = fmul float %fabs, %in1 + store float %fmul, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_f64( +; HSAIL: abs_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fabs_f64(double addrspace(1)* %out, double %in) { + %fabs = call double @llvm.fabs.f64(double %in) + store double %fabs, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_v2f64( +; HSAIL: abs_f64 +; HSAIL: abs_f64 +define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) { + %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in) + store <2 x double> %fabs, <2 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_v4f64( +; HSAIL: abs_f64 +; HSAIL: abs_f64 +; HSAIL: abs_f64 +; HSAIL: abs_f64 +define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) { + %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in) + store <4 x double> %fabs, <4 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_fold_f64( +; HSAIL: abs_f64 +; HSAIL: mul_f64 +define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) { + %fabs = call double @llvm.fabs.f64(double %in0) + %fmul = fmul double %fabs, %in1 + store double %fmul, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_fn_fold_f64( +; HSAIL: abs_f64 +; HSAIL: mul_f64 +define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) { + %fabs = call double @fabs(double %in0) + %fmul = fmul double %fabs, %in1 + store double %fmul, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_free_f64( +; HSAIL: abs_f64 +define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) { + %bc= bitcast i64 %in to double + %fabs = call double @llvm.fabs.f64(double %bc) + store double %fabs, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fabs_fn_free_f64( +; HSAIL: abs_f64 +define void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) { + %bc= bitcast i64 %in to double + %fabs = call double @fabs(double %bc) + store double %fabs, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_abs_f32( +; HSAIL: abs_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @legacy_hsail_abs_f32(float addrspace(1)* %out, float %in) { + %fabs = call float @llvm.HSAIL.abs.f32(float %in) + store float %fabs, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_abs_f64( +; HSAIL: abs_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @legacy_hsail_abs_f64(double addrspace(1)* %out, double %in) { + %fabs = call double @llvm.HSAIL.abs.f64(double %in) + store double %fabs, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fadd.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fadd.ll @@ -0,0 +1,112 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck %s -check-prefix=HSAIL -check-prefix=FUNC + +; FUNC-LABEL: {{^}}prog function &fadd_f32 +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) { + %add = fadd float %a, %b + store float %add, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_v2f32 +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { + %add = fadd <2 x float> %a, %b + store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_v4f32 +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16 + %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16 + %result = fadd <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_v8f32 +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) { + %add = fadd <8 x float> %a, %b + store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32 + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_f64 +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fadd double %r0, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_v2f64 +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) { + %add = fadd <2 x double> %a, %b + store <2 x double> %add, <2 x double> addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_v4f64 +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fadd_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1 + %a = load <4 x double>, <4 x double> addrspace(1)* %in, align 16 + %b = load <4 x double>, <4 x double> addrspace(1)* %b_ptr, align 16 + %result = fadd <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_v8f64 +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fadd_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) { + %add = fadd <8 x double> %a, %b + store <8 x double> %add, <8 x double> addrspace(1)* %out, align 32 + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_f32_r_i +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F44800000; +define void @fadd_f32_r_i(float addrspace(1)* %out, float %a, float %b) { + %add = fadd float %a, 1024.0 + store float %add, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &fadd_f64_r_i +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 0D4090000000000000; +define void @fadd_f64_r_i(double addrspace(1)* %out, double %a, double %b) { + %add = fadd double %a, 1024.0 + store double %add, double addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/HSAIL/fceil.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fceil.ll @@ -0,0 +1,188 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &fceil_ftz_f32( +; HSAIL: ceil_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @fceil_ftz_f32(float addrspace(1)* %out, float %x) { + %y = call float @llvm.ceil.f32(float %x) #0 + store float %y, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v2f32( +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) { + %y = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) #0 + store <2 x float> %y, <2 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v3f32( +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) { + %y = call <3 x float> @llvm.ceil.v3f32(<3 x float> %x) #0 + store <3 x float> %y, <3 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v4f32( +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) { + %y = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) #0 + store <4 x float> %y, <4 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v8f32( +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) { + %y = call <8 x float> @llvm.ceil.v8f32(<8 x float> %x) #0 + store <8 x float> %y, <8 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v16f32( +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +; HSAIL: ceil_ftz_f32 +define void @fceil_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) { + %y = call <16 x float> @llvm.ceil.v16f32(<16 x float> %x) #0 + store <16 x float> %y, <16 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_f64( +; HSAIL: ceil_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @fceil_f64(double addrspace(1)* %out, double %x) { + %y = call double @llvm.ceil.f64(double %x) #0 + store double %y, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v2f64( +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { + %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) #0 + store <2 x double> %y, <2 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v3f64( +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { + %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) #0 + store <3 x double> %y, <3 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v4f64( +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { + %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) #0 + store <4 x double> %y, <4 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v8f64( +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { + %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) #0 + store <8 x double> %y, <8 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fceil_v16f64( +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +; HSAIL: ceil_f64 +define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { + %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) #0 + store <16 x double> %y, <16 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_ceil_f32( +; HSAIL: ceil_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @legacy_hsail_ceil_f32(float addrspace(1)* %out, float %in) #1 { + %tmp = call float @llvm.HSAIL.ceil.f32(float %in) #0 + store float %tmp, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_ceil_f64( +; HSAIL: ceil_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @legacy_hsail_ceil_f64(double addrspace(1)* %out, double %in) #1 { + %tmp = call double @llvm.HSAIL.ceil.f64(double %in) #0 + store double %tmp, double addrspace(1)* %out + ret void +} + +declare float @llvm.ceil.f32(float) #0 +declare <2 x float> @llvm.ceil.v2f32(<2 x float>) #0 +declare <3 x float> @llvm.ceil.v3f32(<3 x float>) #0 +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) #0 +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) #0 +declare <16 x float> @llvm.ceil.v16f32(<16 x float>) #0 + +declare double @llvm.ceil.f64(double) #0 +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) #0 +declare <3 x double> @llvm.ceil.v3f64(<3 x double>) #0 +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) #0 +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) #0 +declare <16 x double> @llvm.ceil.v16f64(<16 x double>) #0 + +declare float @llvm.HSAIL.ceil.f32(float) #0 +declare double @llvm.HSAIL.ceil.f64(double) #0 + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/fcmp-cnd.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fcmp-cnd.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test +; HSAIL: cmp_eq_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 2, 3; +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { + %tmp0 = load float, float addrspace(1)* %in + %cmp = fcmp oeq float %tmp0, 0.000000e+00 + %value = select i1 %cmp, i32 2, i32 3 + store i32 %value, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fcmp-cnde-int-args.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fcmp-cnde-int-args.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test +; HSAIL: cmp_eq_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { + %tmp0 = load float, float addrspace(1)* %in + %cmp = fcmp oeq float %tmp0, 0.000000e+00 + %value = select i1 %cmp, i32 -1, i32 0 + store i32 %value, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fcmp.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fcmp.ll @@ -0,0 +1,289 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fcmp_olt_f32 +; HSAIL: cmp_lt_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp olt float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ole_f32 +; HSAIL: cmp_le_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp ole float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ogt_f32 +; HSAIL: cmp_gt_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp ogt float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_oge_f32 +; HSAIL: cmp_ge_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp oge float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_one_f32 +; HSAIL: cmp_ne_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_one_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp one float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_oeq_f32 +; HSAIL: cmp_eq_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_oeq_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp oeq float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ult_f32 +; HSAIL: cmp_ltu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp ult float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ule_f32 +; HSAIL: cmp_leu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp ule float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ugt_f32 +; HSAIL: cmp_gtu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp ugt float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_uge_f32 +; HSAIL: cmp_geu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp uge float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_une_f32 +; HSAIL: cmp_neu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_une_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp une float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ueq_f32 +; HSAIL: cmp_equ_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 +define void @fcmp_ueq_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = fcmp ueq float %r0, %r1 + %r3 = select i1 %r2, float %r0, float %r1 + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_olt_f64 +; HSAIL: cmp_lt_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp olt double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ole_f64 +; HSAIL: cmp_le_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp ole double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ogt_f64 +; HSAIL: cmp_gt_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp ogt double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_oge_f64 +; HSAIL: cmp_ge_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp oge double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_one_f64 +; HSAIL: cmp_ne_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_one_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp one double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_oeq_f64 +; HSAIL: cmp_eq_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_oeq_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp oeq double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ult_f64 +; HSAIL: cmp_ltu_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp ult double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ule_f64 +; HSAIL: cmp_leu_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp ule double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ugt_f64 +; HSAIL: cmp_gtu_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp ugt double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_uge_f64 +; HSAIL: cmp_geu_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp uge double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_une_f64 +; HSAIL: cmp_neu_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_une_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp une double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fcmp_ueq_f64 +; HSAIL: cmp_equ_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b64 +define void @fcmp_ueq_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fcmp ueq double %r0, %r1 + %r3 = select i1 %r2, double %r0, double %r1 + store double %r3, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fconst64.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fconst64.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fconst_f64 +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 0D4014000000000000; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) { + %r1 = load double, double addrspace(1)* %in + %r2 = fadd double %r1, 5.000000e+00 + store double %r2, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fcopysign.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fcopysign.ll @@ -0,0 +1,96 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.copysign.f32(float, float) #0 +declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) #0 +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) #0 + +declare double @llvm.copysign.f64(double, double) #0 +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0 +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) #0 + +declare float @llvm.HSAIL.copysign.f32(float, float) #0 +declare double @llvm.HSAIL.copysign.f64(double, double) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_copysign_f32( +; HSAIL: copysign_f32 [[RESULT:\$s[0-9]+]] +; HSAIL: st_global_align(4)_f32 [[RESULT]], +; HSAIL: ret; +define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) #1 { + %result = call float @llvm.copysign.f32(float %mag, float %sign) #0 + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_copysign_v2f32( +; HSAIL: copysign_f32 +; HSAIL: copysign_f32 +define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %mag, <2 x float> %sign) #1 { + %result = call <2 x float> @llvm.copysign.v2f32(<2 x float> %mag, <2 x float> %sign) #0 + store <2 x float> %result, <2 x float> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_copysign_v4f32( +; HSAIL: copysign_f32 +; HSAIL: copysign_f32 +; HSAIL: copysign_f32 +; HSAIL: copysign_f32 +define void @test_copysign_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %mag, <4 x float> %sign) #1 { + %result = call <4 x float> @llvm.copysign.v4f32(<4 x float> %mag, <4 x float> %sign) #0 + store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_copysign_f64( +; HSAIL: copysign_f64 [[RESULT:\$d[0-9]+]] +; HSAIL: st_global_align(8)_f64 [[RESULT]], +; HSAIL: ret; +define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) #1 { + %result = call double @llvm.copysign.f64(double %mag, double %sign) #0 + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_copysign_v2f64( +; HSAIL: copysign_f64 +; HSAIL: copysign_f64 +define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) #1 { + %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign) #0 + store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_copysign_v4f64( +; HSAIL: copysign_f64 +; HSAIL: copysign_f64 +; HSAIL: copysign_f64 +; HSAIL: copysign_f64 +define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) #1 { + %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign) #0 + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_hsail_copysign_f32( +; HSAIL: copysign_f32 [[RESULT:\$s[0-9]+]] +; HSAIL: st_global_align(4)_f32 [[RESULT]], +; HSAIL: ret; +define void @test_legacy_hsail_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) #1 { + %result = call float @llvm.HSAIL.copysign.f32(float %mag, float %sign) #0 + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_hsail_copysign_f64( +; HSAIL: copysign_f64 [[RESULT:\$d[0-9]+]] +; HSAIL: st_global_align(8)_f64 [[RESULT]], +; HSAIL: ret; +define void @test_legacy_hsail_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) #1 { + %result = call double @llvm.HSAIL.copysign.f64(double %mag, double %sign) #0 + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/fdiv.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fdiv.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fdiv_f32 +; HSAIL: div_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) { + %tmp0 = fdiv float %a, %b + store float %tmp0, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fdiv_v2f32 +; HSAIL: div_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { + %tmp0 = fdiv <2 x float> %a, %b + store <2 x float> %tmp0, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fdiv_v4f32 +; HSAIL: div_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float>, <4 x float> addrspace(1) * %in + %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr + %result = fdiv <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fdiv_f64 +; HSAIL: div_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fdiv_f64(double addrspace(1)* %out, double %a, double %b) { + %tmp0 = fdiv double %a, %b + store double %tmp0, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fdiv_v2f64 +; HSAIL: div_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) { + %tmp0 = fdiv <2 x double> %a, %b + store <2 x double> %tmp0, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fdiv_v4f64 +; HSAIL: div_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1 + %a = load <4 x double>, <4 x double> addrspace(1) * %in + %b = load <4 x double>, <4 x double> addrspace(1) * %b_ptr + %result = fdiv <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/ffloor.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ffloor.ll @@ -0,0 +1,137 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &floor_f32( +; HSAIL: floor_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @floor_f32(float addrspace(1)* %out, float %in) { + %tmp = call float @llvm.floor.f32(float %in) #0 + store float %tmp, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &floor_v2f32( +; HSAIL: floor_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +; HSAIL: floor_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @floor_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) { + %tmp = call <2 x float> @llvm.floor.v2f32(<2 x float> %in) #0 + store <2 x float> %tmp, <2 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &floor_v4f32( +; HSAIL: floor_ftz_f32 +; HSAIL: floor_ftz_f32 +; HSAIL: floor_ftz_f32 +; HSAIL: floor_ftz_f32 +define void @floor_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) { + %tmp = call <4 x float> @llvm.floor.v4f32(<4 x float> %in) #0 + store <4 x float> %tmp, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ffloor_f64( +; HSAIL: floor_f64 +define void @ffloor_f64(double addrspace(1)* %out, double %x) { + %y = call double @llvm.floor.f64(double %x) #0 + store double %y, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ffloor_v2f64( +; HSAIL: floor_f64 +; HSAIL: floor_f64 +define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { + %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) #0 + store <2 x double> %y, <2 x double> addrspace(1)* %out + ret void +} + +; FIXME-FUNC-LABEL: {{^}}prog function &ffloor_v3f64( +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { + %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) #0 + store <3 x double> %y, <3 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ffloor_v4f64( +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { + %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) #0 + store <4 x double> %y, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ffloor_v8f64( +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { + %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) #0 + store <8 x double> %y, <8 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ffloor_v16f64( +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +; HSAIL: floor_f64 +define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { + %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) #0 + store <16 x double> %y, <16 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_floor_f32( +; HSAIL: floor_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @legacy_hsail_floor_f32(float addrspace(1)* %out, float %in) #1 { + %tmp = call float @llvm.HSAIL.floor.f32(float %in) #0 + store float %tmp, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_floor_f64( +; HSAIL: floor_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @legacy_hsail_floor_f64(double addrspace(1)* %out, double %in) #1 { + %tmp = call double @llvm.HSAIL.floor.f64(double %in) #0 + store double %tmp, double addrspace(1)* %out + ret void +} + +declare float @llvm.floor.f32(float) #0 +declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0 +declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0 + +declare double @llvm.floor.f64(double) #0 +declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0 +declare <3 x double> @llvm.floor.v3f64(<3 x double>) #0 +declare <4 x double> @llvm.floor.v4f64(<4 x double>) #0 +declare <8 x double> @llvm.floor.v8f64(<8 x double>) #0 +declare <16 x double> @llvm.floor.v16f64(<16 x double>) #0 + +declare float @llvm.HSAIL.floor.f32(float) #0 +declare double @llvm.HSAIL.floor.f64(double) #0 + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/flat-atomics.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/flat-atomics.ll @@ -0,0 +1,484 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xchg_ret_i32( +; HSAIL: atomic_exch_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_xchg_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xchg_monotonic_ret_i32( +; HSAIL: atomic_exch_rlx_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_xchg_monotonic_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(4)* %ptr, i32 4 monotonic + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xchg_acquire_ret_i32( +; HSAIL: atomic_exch_scacq_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_xchg_acquire_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(4)* %ptr, i32 4 acquire + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xchg_release_ret_i32( +; HSAIL: atomic_exch_screl_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_xchg_release_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(4)* %ptr, i32 4 release + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xchg_acq_rel_ret_i32( +; HSAIL: atomic_exch_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_xchg_acq_rel_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(4)* %ptr, i32 4 acq_rel + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xchg_ret_i32_offset( +; HSAIL: atomic_exch_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_xchg_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw xchg i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_add_ret_i32( +; HSAIL: atomic_add_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_add_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_add_ret_i32_offset( +; HSAIL: atomic_add_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_add_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_inc_ret_i32( +; HSAIL: atomic_add_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 1; +define void @flat_atomic_inc_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(4)* %ptr, i32 1 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_inc_ret_i32_offset( +; HSAIL: atomic_add_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 1; +define void @flat_atomic_inc_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(4)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_sub_ret_i32( +; HSAIL: atomic_sub_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_sub_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_sub_ret_i32_offset( +; HSAIL: atomic_sub_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_sub_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_dec_ret_i32( +; HSAIL: atomic_sub_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 1; +define void @flat_atomic_dec_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(4)* %ptr, i32 1 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_dec_ret_i32_offset( +; HSAIL: atomic_sub_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 1; +define void @flat_atomic_dec_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(4)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_and_ret_i32( +; HSAIL: atomic_and_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_and_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw and i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_and_ret_i32_offset( +; HSAIL: atomic_and_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_and_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw and i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_or_ret_i32( +; HSAIL: atomic_or_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_or_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw or i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_or_ret_i32_offset( +; HSAIL: atomic_or_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_or_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw or i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xor_ret_i32( +; HSAIL: atomic_xor_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_xor_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw xor i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xor_ret_i32_offset( +; HSAIL: atomic_xor_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_xor_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw xor i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; FIXME: There is no atomic nand instr +; XHSAIL-LABEL: {{^}}prog function &flat_atomic_nand_ret_i32(uction, so we somehow need to expand this. +; define void @flat_atomic_nand_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { +; %result = atomicrmw nand i32 addrspace(4)* %ptr, i32 4 seq_cst +; store i32 %result, i32 addrspace(4)* %out, align 4 +; ret void +; } + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_min_ret_i32( +; HSAIL: atomic_min_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_min_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw min i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_min_ret_i32_offset( +; HSAIL: atomic_min_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_min_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw min i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_max_ret_i32( +; HSAIL: atomic_max_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_max_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw max i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_max_ret_i32_offset( +; HSAIL: atomic_max_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_max_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw max i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_umin_ret_i32( +; HSAIL: atomic_min_scar_system_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_umin_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw umin i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_umin_ret_i32_offset( +; HSAIL: atomic_min_scar_system_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_umin_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw umin i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_umax_ret_i32( +; HSAIL: atomic_max_scar_system_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_umax_ret_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw umax i32 addrspace(4)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_umax_ret_i32_offset( +; HSAIL: atomic_max_scar_system_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_umax_ret_i32_offset(i32 addrspace(4)* %out, i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw umax i32 addrspace(4)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(4)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xchg_noret_i32( +; HSAIL: atomic_exch_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @flat_atomic_xchg_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xchg_noret_i32_offset( +; HSAIL: atomic_exch_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_xchg_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw xchg i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_add_noret_i32( +; HSAIL: atomicnoret_add_scar_system_s32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_add_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_add_noret_i32_offset( +; HSAIL: atomicnoret_add_scar_system_s32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_add_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_inc_noret_i32( +define void @flat_atomic_inc_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(4)* %ptr, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_inc_noret_i32_offset( +define void @flat_atomic_inc_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(4)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_inc_noret_i32_bad_si_offset( +define void @flat_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(4)* %ptr, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 %add + %result = atomicrmw add i32 addrspace(4)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_sub_noret_i32( +; HSAIL: atomicnoret_sub_scar_system_s32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_sub_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_sub_noret_i32_offset( +; HSAIL: atomicnoret_sub_scar_system_s32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_sub_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_dec_noret_i32( +define void @flat_atomic_dec_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(4)* %ptr, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_dec_noret_i32_offset( +define void @flat_atomic_dec_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(4)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_and_noret_i32( +; HSAIL: atomicnoret_and_scar_system_b32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_and_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw and i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_and_noret_i32_offset( +; HSAIL: atomicnoret_and_scar_system_b32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_and_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw and i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_or_noret_i32( +; HSAIL: atomicnoret_or_scar_system_b32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_or_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw or i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_or_noret_i32_offset( +; HSAIL: atomicnoret_or_scar_system_b32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_or_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw or i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xor_noret_i32( +; HSAIL: atomicnoret_xor_scar_system_b32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_xor_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw xor i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_xor_noret_i32_offset( +; HSAIL: atomicnoret_xor_scar_system_b32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_xor_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw xor i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; FIXME: There is no atomic nand instr +; XHSAIL-LABEL: {{^}}prog function &flat_atomic_nand_noret_i32(uction, so we somehow need to expand this. +; define void @flat_atomic_nand_noret_i32(i32 addrspace(4)* %ptr) nounwind { +; %result = atomicrmw nand i32 addrspace(4)* %ptr, i32 4 seq_cst +; ret void +; } + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_min_noret_i32( +; HSAIL: atomicnoret_min_scar_system_s32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_min_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw min i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_min_noret_i32_offset( +; HSAIL: atomicnoret_min_scar_system_s32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_min_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw min i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_max_noret_i32( +; HSAIL: atomicnoret_max_scar_system_s32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_max_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw max i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_max_noret_i32_offset( +; HSAIL: atomicnoret_max_scar_system_s32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_max_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw max i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_umin_noret_i32( +; HSAIL: atomicnoret_min_scar_system_u32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_umin_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw umin i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_umin_noret_i32_offset( +; HSAIL: atomicnoret_min_scar_system_u32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_umin_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw umin i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_umax_noret_i32( +; HSAIL: atomicnoret_max_scar_system_u32 [{{\$s[0-9]+}}], 4; +define void @flat_atomic_umax_noret_i32(i32 addrspace(4)* %ptr) nounwind { + %result = atomicrmw umax i32 addrspace(4)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_umax_noret_i32_offset( +; HSAIL: atomicnoret_max_scar_system_u32 [{{\$s[0-9]+}}+16], 4; +define void @flat_atomic_umax_noret_i32_offset(i32 addrspace(4)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(4)* %ptr, i32 4 + %result = atomicrmw umax i32 addrspace(4)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_load_acquire_ret_i32( +; HSAIL: atomic_ld_scacq_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @flat_atomic_load_acquire_ret_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(4)* %ptr acquire, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_load_seq_cst_ret_i32( +; HSAIL: memfence_screl_system; +; HSAIL: atomic_ld_scacq_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @flat_atomic_load_seq_cst_ret_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(4)* %ptr seq_cst, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_load_monotonic_ret_i32( +; HSAIL: atomic_ld_rlx_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @flat_atomic_load_monotonic_ret_i32(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(4)* %ptr monotonic, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_store_release_i32( +; HSAIL: atomicnoret_st_screl_system_b32 [{{\$s[0-9]+}}], 9; +define void @flat_atomic_store_release_i32(i32 addrspace(4)* %ptr) nounwind { + store atomic i32 9, i32 addrspace(4)* %ptr release, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_store_seq_cst_i32( +; HSAIL: atomicnoret_st_screl_system_b32 [{{\$s[0-9]+}}], 9; +; HSAIL: memfence_scacq_system; +define void @flat_atomic_store_seq_cst_i32(i32 addrspace(4)* %ptr) nounwind { + store atomic i32 9, i32 addrspace(4)* %ptr seq_cst, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &flat_atomic_store_monotonic_i32( +; HSAIL: atomicnoret_st_rlx_system_b32 [{{\$s[0-9]+}}], {{\$s[0-9]+}}; +define void @flat_atomic_store_monotonic_i32(i32 addrspace(4)* %ptr, i32 %foo) nounwind { + store atomic i32 %foo, i32 addrspace(4)* %ptr monotonic, align 4 + ret void +} Index: test/CodeGen/HSAIL/fma.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fma.ll @@ -0,0 +1,120 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.fma.f32(float, float, float) #0 +declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #0 +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0 + +declare double @llvm.fma.f64(double, double, double) #0 +declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 +declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) #0 + +declare float @llvm.HSAIL.fma.f32(float, float, float) #0 +declare double @llvm.HSAIL.fma.f64(double, double, double) #0 + + +; HSAIL-LABEL: {{^}}prog function &fma_f32( +; HSAIL: fma_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1, + float addrspace(1)* %in2, float addrspace(1)* %in3) #1 { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = load float, float addrspace(1)* %in3 + %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2) #0 + store float %r3, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fma_v2f32( +; HSAIL: fma_ftz_f32 +; HSAIL: fma_ftz_f32 +define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, + <2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) #1 { + %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1 + %r1 = load <2 x float>, <2 x float> addrspace(1)* %in2 + %r2 = load <2 x float>, <2 x float> addrspace(1)* %in3 + %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 + store <2 x float> %r3, <2 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fma_v4f32( +; HSAIL: fma_ftz_f32 +; HSAIL: fma_ftz_f32 +; HSAIL: fma_ftz_f32 +; HSAIL: fma_ftz_f32 +define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1, + <4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) #1 { + %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1 + %r1 = load <4 x float>, <4 x float> addrspace(1)* %in2 + %r2 = load <4 x float>, <4 x float> addrspace(1)* %in3 + %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2) #0 + store <4 x float> %r3, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fma_f64( +; SI: fma_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2, double addrspace(1)* %in3) #1 { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = load double, double addrspace(1)* %in3 + %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2) #0 + store double %r3, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fma_v2f64( +; SI: fma_f64 +; SI: fma_f64 +define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, + <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) #1 { + %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1 + %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2 + %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3 + %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2) #0 + store <2 x double> %r3, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fma_v4f64( +; SI: fma_f64 +; SI: fma_f64 +; SI: fma_f64 +; SI: fma_f64 +define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1, + <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) #1 { + %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1 + %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2 + %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3 + %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2) #0 + store <4 x double> %r3, <4 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_fma_f32( +; HSAIL: fma_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @legacy_hsail_fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1, + float addrspace(1)* %in2, float addrspace(1)* %in3) #1 { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = load float, float addrspace(1)* %in3 + %r3 = tail call float @llvm.HSAIL.fma.f32(float %r0, float %r1, float %r2) #0 + store float %r3, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_fma_f64( +; HSAIL: fma_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @legacy_hsail_fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2, double addrspace(1)* %in3) #1 { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = load double, double addrspace(1)* %in3 + %r3 = tail call double @llvm.HSAIL.fma.f64(double %r0, double %r1, double %r2) #0 + store double %r3, double addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/fmaxnum.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fmaxnum.ll @@ -0,0 +1,186 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.maxnum.f32(float, float) #0 +declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0 +declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0 +declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #0 +declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0 + +declare double @llvm.maxnum.f64(double, double) #0 +declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0 +declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) #0 +declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) #0 +declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0 + +declare float @llvm.HSAIL.max.f32(float, float) #0 +declare double @llvm.HSAIL.max.f64(double, double) #0 + +declare float @llvm.HSAIL.gcn.max.f32(float, float) #0 +declare double @llvm.HSAIL.gcn.max.f64(double, double) #0 + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_f32( +; HSAIL: max_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @test_fmaxnum_f32(float addrspace(1)* %out, float %a, float %b) #1 { + %val = call float @llvm.maxnum.f32(float %a, float %b) #0 + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_v2f32( +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +define void @test_fmaxnum_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 { + %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0 + store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_v4f32( +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +define void @test_fmaxnum_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #1 { + %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0 + store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_v8f32( +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +define void @test_fmaxnum_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #1 { + %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0 + store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_v16f32( +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +; HSAIL: max_ftz_f32 +define void @test_fmaxnum_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #1 { + %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0 + store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_f64( +; HSAIL: max_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @test_fmaxnum_f64(double addrspace(1)* %out, double %a, double %b) #1 { + %val = call double @llvm.maxnum.f64(double %a, double %b) #0 + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_v2f64( +; HSAIL: max_f64 +; HSAIL: max_f64 +define void @test_fmaxnum_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) #1 { + %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0 + store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_v4f64( +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +define void @test_fmaxnum_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) #1 { + %val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0 + store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_v8f64( +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +define void @test_fmaxnum_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) #1 { + %val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0 + store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fmaxnum_v16f64( +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +; HSAIL: max_f64 +define void @test_fmaxnum_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) #1 { + %val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0 + store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_hsail_max_f32( +; HSAIL: max_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @test_legacy_hsail_max_f32(float addrspace(1)* %out, float %a, float %b) #1 { + %val = call float @llvm.HSAIL.max.f32(float %a, float %b) #0 + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_hsail_max_f64( +; HSAIL: max_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @test_legacy_hsail_max_f64(double addrspace(1)* %out, double %a, double %b) #1 { + %val = call double @llvm.HSAIL.max.f64(double %a, double %b) #0 + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_gcn_max_f32( +; HSAIL: gcn_max_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @test_legacy_gcn_max_f32(float addrspace(1)* %out, float %a, float %b) #1 { + %val = call float @llvm.HSAIL.gcn.max.f32(float %a, float %b) #0 + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_gcn_max_f64( +; HSAIL: gcn_max_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @test_legacy_gcn_max_f64(double addrspace(1)* %out, double %a, double %b) #1 { + %val = call double @llvm.HSAIL.gcn.max.f64(double %a, double %b) #0 + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/fminnum.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fminnum.ll @@ -0,0 +1,186 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.minnum.f32(float, float) #0 +declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0 +declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #0 +declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #0 +declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0 + +declare double @llvm.minnum.f64(double, double) #0 +declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0 +declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) #0 +declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0 +declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0 + +declare float @llvm.HSAIL.min.f32(float, float) #0 +declare double @llvm.HSAIL.min.f64(double, double) #0 + +declare float @llvm.HSAIL.gcn.min.f32(float, float) #0 +declare double @llvm.HSAIL.gcn.min.f64(double, double) #0 + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_f32( +; HSAIL: min_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @test_fminnum_f32(float addrspace(1)* %out, float %a, float %b) #1 { + %val = call float @llvm.minnum.f32(float %a, float %b) #0 + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_v2f32( +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +define void @test_fminnum_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 { + %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0 + store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_v4f32( +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +define void @test_fminnum_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #1 { + %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0 + store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_v8f32( +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +define void @test_fminnum_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #1 { + %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0 + store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_v16f32( +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +; HSAIL: min_ftz_f32 +define void @test_fminnum_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #1 { + %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0 + store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_f64( +; HSAIL: min_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @test_fminnum_f64(double addrspace(1)* %out, double %a, double %b) #1 { + %val = call double @llvm.minnum.f64(double %a, double %b) #0 + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_v2f64( +; HSAIL: min_f64 +; HSAIL: min_f64 +define void @test_fminnum_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) #1 { + %val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0 + store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_v4f64( +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +define void @test_fminnum_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) #1 { + %val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0 + store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_v8f64( +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +define void @test_fminnum_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) #1 { + %val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0 + store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_fminnum_v16f64( +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +; HSAIL: min_f64 +define void @test_fminnum_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) #1 { + %val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0 + store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_hsail_min_f32( +; HSAIL: min_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @test_legacy_hsail_min_f32(float addrspace(1)* %out, float %a, float %b) #1 { + %val = call float @llvm.HSAIL.min.f32(float %a, float %b) #0 + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_hsail_min_f64( +; HSAIL: min_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @test_legacy_hsail_min_f64(double addrspace(1)* %out, double %a, double %b) #1 { + %val = call double @llvm.HSAIL.min.f64(double %a, double %b) #0 + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_gcn_min_f32( +; HSAIL: gcn_min_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @test_legacy_gcn_min_f32(float addrspace(1)* %out, float %a, float %b) #1 { + %val = call float @llvm.HSAIL.gcn.min.f32(float %a, float %b) #0 + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_gcn_min_f64( +; HSAIL: gcn_min_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @test_legacy_gcn_min_f64(double addrspace(1)* %out, double %a, double %b) #1 { + %val = call double @llvm.HSAIL.gcn.min.f64(double %a, double %b) #0 + store double %val, double addrspace(1)* %out, align 8 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/fmuladd.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fmuladd.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare float @llvm.fmuladd.f32(float, float, float) +declare double @llvm.fmuladd.f64(double, double, double) + +; FUNC-LABEL: {{^}}prog function &fmuladd_f32 +; HSAIL: mul_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1, + float addrspace(1)* %in2, float addrspace(1)* %in3) { + %r0 = load float, float addrspace(1)* %in1 + %r1 = load float, float addrspace(1)* %in2 + %r2 = load float, float addrspace(1)* %in3 + %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2) + store float %r3, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fmuladd_f64 +; HSAIL: mul_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2, double addrspace(1)* %in3) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = load double, double addrspace(1)* %in3 + %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2) + store double %r3, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fneg.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fneg.ll @@ -0,0 +1,96 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fneg_f32 +; HSAIL: neg_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fneg_f32(float addrspace(1)* %out, float %in) { + %fneg = fsub float -0.000000e+00, %in + store float %fneg, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_v2f32 +; HSAIL: neg_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; + +define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) { + %fneg = fsub <2 x float> , %in + store <2 x float> %fneg, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_v4f32 +; HSAIL: neg_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) { + %fneg = fsub <4 x float> , %in + store <4 x float> %fneg, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_free_f32 +; HSAIL: sub_ftz_f32 {{\$s[0-9]+}}, 0F00000000, {{\$s[0-9]+}}; +define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { + %bc = bitcast i32 %in to float + %fsub = fsub float 0.0, %bc + store float %fsub, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_fold_f32 +; HSAIL: neg_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fneg_fold_f32(float addrspace(1)* %out, float %in) { + %fsub = fsub float -0.0, %in + %fmul = fmul float %fsub, %in + store float %fmul, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_f64 +; HSAIL: neg_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fneg_f64(double addrspace(1)* %out, double %in) { + %fneg = fsub double -0.000000e+00, %in + store double %fneg, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_v2f64 +; HSAIL: neg_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: neg_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) { + %fneg = fsub <2 x double> , %in + store <2 x double> %fneg, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_v4f64 +; HSAIL: neg_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: neg_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: neg_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: neg_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) { + %fneg = fsub <4 x double> , %in + store <4 x double> %fneg, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_free_f64 +; HSAIL: sub_f64 {{\$d[0-9]+}}, 0D0000000000000000, {{\$d[0-9]+}}; +define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { + %bc = bitcast i64 %in to double + %fsub = fsub double 0.0, %bc + store double %fsub, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fneg_fold_f64 +; HSAIL: neg_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: mul_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fneg_fold_f64(double addrspace(1)* %out, double %in) { + %fsub = fsub double -0.0, %in + %fmul = fmul double %fsub, %in + store double %fmul, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fp64_to_sint.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fp64_to_sint.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fp64_to_sint +; HSAIL: ld_arg_align(8)_f64 {{\$d[0-9]+}}, [%in]; +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @fp64_to_sint(i32 addrspace(1)* %out, double %in) { + %result = fptosi double %in to i32 + store i32 %result, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fp_to_sint.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fp_to_sint.ll @@ -0,0 +1,115 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f32_i32 +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_sint_f32_i32 (i32 addrspace(1)* %out, float %in) { + %conv = fptosi float %in to i32 + store i32 %conv, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f32_v2i32 +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_sint_f32_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { + %result = fptosi <2 x float> %in to <2 x i32> + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f32_v4i32 +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_s32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_sint_f32_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %value = load <4 x float>, <4 x float> addrspace(1) * %in + %result = fptosi <4 x float> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f32_i64 +; HSAIL: cvt_ftz_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_sint_f32_i64 (i64 addrspace(1)* %out, float %in) { + %conv = fptosi float %in to i64 + store i64 %conv, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f32_v2i64 +; HSAIL: cvt_ftz_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_sint_f32_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { + %conv = fptosi <2 x float> %x to <2 x i64> + store <2 x i64> %conv, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f32_v4i64 +; HSAIL: cvt_ftz_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_s64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_sint_f32_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { + %conv = fptosi <4 x float> %x to <4 x i64> + store <4 x i64> %conv, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f64_i32 +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_sint_f64_i32 (i32 addrspace(1)* %out, double %in) { + %conv = fptosi double %in to i32 + store i32 %conv, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f64_v2i32 +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_sint_f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) { + %result = fptosi <2 x double> %in to <2 x i32> + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f64_v4i32 +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_s32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_sint_f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> addrspace(1)* %in) { + %value = load <4 x double>, <4 x double> addrspace(1) * %in + %result = fptosi <4 x double> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f64_i64 +; HSAIL: cvt_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_sint_f64_i64 (i64 addrspace(1)* %out, double %in) { + %conv = fptosi double %in to i64 + store i64 %conv, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f64_v2i64 +; HSAIL: cvt_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_sint_f64_v2i64(<2 x i64> addrspace(1)* %out, <2 x double> %x) { + %conv = fptosi <2 x double> %x to <2 x i64> + store <2 x i64> %conv, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_sint_f64_v4i64 +; HSAIL: cvt_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_s64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_sint_f64_v4i64(<4 x i64> addrspace(1)* %out, <4 x double> %x) { + %conv = fptosi <4 x double> %x to <4 x i64> + store <4 x i64> %conv, <4 x i64> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fp_to_uint.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fp_to_uint.ll @@ -0,0 +1,115 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f32_i32 +; HSAIL: cvt_ftz_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_uint_f32_i32 (i32 addrspace(1)* %out, float %in) { + %conv = fptoui float %in to i32 + store i32 %conv, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f32_v2i32 +; HSAIL: cvt_ftz_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_uint_f32_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { + %result = fptoui <2 x float> %in to <2 x i32> + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f32_v4i32 +; HSAIL: cvt_ftz_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_u32_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_uint_f32_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %value = load <4 x float>, <4 x float> addrspace(1) * %in + %result = fptoui <4 x float> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f32_i64 +; HSAIL: cvt_ftz_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_uint_f32_i64 (i64 addrspace(1)* %out, float %in) { + %conv = fptoui float %in to i64 + store i64 %conv, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f32_v2i64 +; HSAIL: cvt_ftz_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_uint_f32_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { + %conv = fptoui <2 x float> %x to <2 x i64> + store <2 x i64> %conv, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f32_v4i64 +; HSAIL: cvt_ftz_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_u64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @fp_to_uint_f32_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { + %conv = fptoui <4 x float> %x to <4 x i64> + store <4 x i64> %conv, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f64_i32 +; HSAIL: cvt_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_uint_f64_i32 (i32 addrspace(1)* %out, double %in) { + %conv = fptoui double %in to i32 + store i32 %conv, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f64_v2i32 +; HSAIL: cvt_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_uint_f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) { + %result = fptoui <2 x double> %in to <2 x i32> + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f64_v4i32 +; HSAIL: cvt_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_uint_f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> addrspace(1)* %in) { + %value = load <4 x double>, <4 x double> addrspace(1) * %in + %result = fptoui <4 x double> %value to <4 x i32> + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f64_i64 +; HSAIL: cvt_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_uint_f64_i64 (i64 addrspace(1)* %out, double %in) { + %conv = fptoui double %in to i64 + store i64 %conv, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f64_v2i64 +; HSAIL: cvt_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_uint_f64_v2i64(<2 x i64> addrspace(1)* %out, <2 x double> %x) { + %conv = fptoui <2 x double> %x to <2 x i64> + store <2 x i64> %conv, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fp_to_uint_f64_v4i64 +; HSAIL: cvt_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u64_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fp_to_uint_f64_v4i64(<4 x i64> addrspace(1)* %out, <4 x double> %x) { + %conv = fptoui <4 x double> %x to <4 x i64> + store <4 x i64> %conv, <4 x i64> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fpext.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fpext.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fpext_f32_to_f64( +; HSAIL: cvt_ftz_f64_f32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @fpext_f32_to_f64(double addrspace(1)* %out, float %in) { + %result = fpext float %in to double + store double %result, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fpext_v2f32_to_v2f64( +; HSAIL: cvt_ftz_f64_f32 +; HSAIL: cvt_ftz_f64_f32 +define void @fpext_v2f32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x float> %in) { + %result = fpext <2 x float> %in to <2 x double> + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fpext_v4f32_to_v4f64( +; HSAIL: cvt_ftz_f64_f32 +; HSAIL: cvt_ftz_f64_f32 +; HSAIL: cvt_ftz_f64_f32 +; HSAIL: cvt_ftz_f64_f32 +define void @fpext_v4f32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x float> %in) { + %result = fpext <4 x float> %in to <4 x double> + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fpext_v8f32_to_v8f64( +; HSAIL: cvt_ftz_f64_f32 +; HSAIL: cvt_ftz_f64_f32 +; HSAIL: cvt_ftz_f64_f32 +; HSAIL: cvt_ftz_f64_f32 +define void @fpext_v8f32_to_v8f64(<8 x double> addrspace(1)* %out, <8 x float> %in) { + %result = fpext <8 x float> %in to <8 x double> + store <8 x double> %result, <8 x double> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fptrunc.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fptrunc.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &fptrunc_f64_to_f32( +; HSAIL: ld_arg_align(8)_f64 {{\$d[0-9]+}}, [%in]; +; HSAIL-NEXT: cvt_ftz_f32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @fptrunc_f64_to_f32(float addrspace(1)* %out, double %in) { + %result = fptrunc double %in to float + store float %result, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fptrunc_v2f64_to_v2f32( +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +define void @fptrunc_v2f64_to_v2f32(<2 x float> addrspace(1)* %out, <2 x double> %in) { + %result = fptrunc <2 x double> %in to <2 x float> + store <2 x float> %result, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fptrunc_v4f64_to_v4f32( +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +define void @fptrunc_v4f64_to_v4f32(<4 x float> addrspace(1)* %out, <4 x double> %in) { + %result = fptrunc <4 x double> %in to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &fptrunc_v8f64_to_v8f32( +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +; HSAIL: cvt_ftz_f32_f64 +define void @fptrunc_v8f64_to_v8f32(<8 x float> addrspace(1)* %out, <8 x double> %in) { + %result = fptrunc <8 x double> %in to <8 x float> + store <8 x float> %result, <8 x float> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/fsub.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/fsub.ll @@ -0,0 +1,131 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &v_fsub_f32( +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) { + %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1 + %a = load float, float addrspace(1)* %in, align 4 + %b = load float, float addrspace(1)* %b_ptr, align 4 + %result = fsub float %a, %b + store float %result, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_fsub_f32( +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[0-9]+}}, {{\$s[[0-9]+}}; +define void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) { + %sub = fsub float %a, %b + store float %sub, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fsub_v2f32( +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { + %sub = fsub <2 x float> %a, %b + store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fsub_v4f32( +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 + %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16 + %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16 + %result = fsub <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_fsub_v4f32( +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +; HSAIL: sub_ftz_f32 {{\$s[[0-9]+}}, {{\$s[[0-9]+}}, {{\$s[[0-9]+}}; +define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) { + %result = fsub <4 x float> %a, %b + store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fsub_f64( +; HSAIL: sub_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1, + double addrspace(1)* %in2) { + %r0 = load double, double addrspace(1)* %in1 + %r1 = load double, double addrspace(1)* %in2 + %r2 = fsub double %r0, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_fsub_f64( +; HSAIL: sub_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double %a, %b + store double %sub, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_fsub_imm_f64( +; HSAIL: sub_f64 {{\$d[0-9]+}}, 0D4010000000000000, {{\$d[0-9]+}} +define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double 4.0, %a + store double %sub, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_fsub_imm_inf64( +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 0Dc010000000000000 +define void @s_fsub_imm_inf64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double %a, 4.0 + store double %sub, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_fsub_self_f64( +; HSAIL: sub_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @s_fsub_self_f64(double addrspace(1)* %out, double %a) { + %sub = fsub double %a, %a + store double %sub, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fsub_v2f64( +; HSAIL: sub_f64 +; HSAIL: sub_f64 +define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) { + %sub = fsub <2 x double> %a, %b + store <2 x double> %sub, <2 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fsub_v4f64( +; HSAIL: sub_f64 +; HSAIL: sub_f64 +; HSAIL: sub_f64 +; HSAIL: sub_f64 +define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1 + %a = load <4 x double>, <4 x double> addrspace(1)* %in + %b = load <4 x double>, <4 x double> addrspace(1)* %b_ptr + %result = fsub <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_fsub_v4f64( +; HSAIL: sub_f64 +; HSAIL: sub_f64 +; HSAIL: sub_f64 +; HSAIL: sub_f64 +define void @s_fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) { + %result = fsub <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16 + ret void +} Index: test/CodeGen/HSAIL/ftrunc.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ftrunc.ll @@ -0,0 +1,188 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &ftrunc_ftz_f32( +; HSAIL: trunc_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @ftrunc_ftz_f32(float addrspace(1)* %out, float %x) { + %y = call float @llvm.trunc.f32(float %x) #0 + store float %y, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v2f32( +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) { + %y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) #0 + store <2 x float> %y, <2 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v3f32( +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; SAIL: trunc_ftz_f32 +define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) { + %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) #0 + store <3 x float> %y, <3 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v4f32( +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) { + %y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) #0 + store <4 x float> %y, <4 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v8f32( +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) { + %y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) #0 + store <8 x float> %y, <8 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v16f32( +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +; HSAIL: trunc_ftz_f32 +define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) { + %y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) #0 + store <16 x float> %y, <16 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_f64( +; HSAIL: trunc_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @ftrunc_f64(double addrspace(1)* %out, double %x) { + %y = call double @llvm.trunc.f64(double %x) #0 + store double %y, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v2f64( +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) { + %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) #0 + store <2 x double> %y, <2 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v3f64( +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) { + %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) #0 + store <3 x double> %y, <3 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v4f64( +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) { + %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) #0 + store <4 x double> %y, <4 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v8f64( +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) { + %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) #0 + store <8 x double> %y, <8 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ftrunc_v16f64( +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +; HSAIL: trunc_f64 +define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) { + %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) #0 + store <16 x double> %y, <16 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_trunc_f32( +; HSAIL: trunc_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @legacy_hsail_trunc_f32(float addrspace(1)* %out, float %in) #1 { + %tmp = call float @llvm.HSAIL.trunc.f32(float %in) #0 + store float %tmp, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_trunc_f64( +; HSAIL: trunc_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @legacy_hsail_trunc_f64(double addrspace(1)* %out, double %in) #1 { + %tmp = call double @llvm.HSAIL.trunc.f64(double %in) #0 + store double %tmp, double addrspace(1)* %out + ret void +} + +declare float @llvm.trunc.f32(float) #0 +declare <2 x float> @llvm.trunc.v2f32(<2 x float>) #0 +declare <3 x float> @llvm.trunc.v3f32(<3 x float>) #0 +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) #0 +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) #0 +declare <16 x float> @llvm.trunc.v16f32(<16 x float>) #0 + +declare double @llvm.trunc.f64(double) #0 +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) #0 +declare <3 x double> @llvm.trunc.v3f64(<3 x double>) #0 +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) #0 +declare <8 x double> @llvm.trunc.v8f64(<8 x double>) #0 +declare <16 x double> @llvm.trunc.v16f64(<16 x double>) #0 + +declare float @llvm.HSAIL.trunc.f32(float) #0 +declare double @llvm.HSAIL.trunc.f64(double) #0 + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/function-args.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/function-args.ll @@ -0,0 +1,657 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; HSAIL: decl prog function &i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u8 %in); + +; HSAIL: decl prog function &i8_zext_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u8 %in); + +; HSAIL: decl prog function &i8_sext_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_s8 %in); + +; HSAIL: decl prog function &i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u16 %in); + +; HSAIL: decl prog function &i16_zext_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u16 %in); + +; HSAIL: decl prog function &i16_sext_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_s16 %in); + +; HSAIL: decl prog function &i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u32 %in); + +; HSAIL: decl prog function &f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_f32 %in); + +; HSAIL: decl prog function &v2i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(2) arg_u8 %in[2]); + +; HSAIL: decl prog function &v2i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(4) arg_u16 %in[2]); + +; HSAIL: decl prog function &v2i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_u32 %in[2]); + +; HSAIL: decl prog function &v2f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_f32 %in[2]); + +; HSAIL: decl prog function &v3i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(4) arg_u8 %in[4]); + +; HSAIL: decl prog function &v3i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_u16 %in[4]); + +; HSAIL: decl prog function &v3i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u32 %in[4]); + +; HSAIL: decl prog function &v3f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_f32 %in[4]); + +; HSAIL: decl prog function &v4i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(4) arg_u8 %in[4]); + +; HSAIL: decl prog function &v4i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_u16 %in[4]); + +; HSAIL: decl prog function &v4i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u32 %in[4]); + +; HSAIL: decl prog function &v4f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_f32 %in[4]); + +; HSAIL: decl prog function &v8i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_u8 %in[8]); + +; HSAIL: decl prog function &v8i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u16 %in[8]); + +; HSAIL: decl prog function &v8i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_u32 %in[8]); + +; HSAIL: decl prog function &v8f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_f32 %in[8]); + +; HSAIL: decl prog function &v16i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u8 %in[16]); + +; HSAIL: decl prog function &v16i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_u16 %in[16]); + +; HSAIL: decl prog function &v16i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(64) arg_u32 %in[16]); + +; HSAIL: decl prog function &v16f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(64) arg_f32 %in[16]); + +; HSAIL: decl prog function &i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u64 %a); + +; HSAIL: decl prog function &v2i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u64 %in[2]); + +; HSAIL: decl prog function &v3i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_u64 %in[4]); + +; HSAIL: decl prog function &v4i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_u64 %in[4]); + +; HSAIL: decl prog function &v8i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(64) arg_u64 %in[8]); + +; HSAIL: decl prog function &array_4xi32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u32 %x[4]); + +; HSAIL: decl prog function &struct_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(4) arg_u8 %x[8]); + +; HSAIL: decl prog function &struct_byval_arg()( +; HSAIL: arg_u32 %out, +; HSAIL: arg_u32 %x); + + +; FUNC-LABEL: {{^}}prog function &i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u8 %in) +; HSAIL-NEXT: { +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 255; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { + %tmp0 = zext i8 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i8_zext_arg()({{$}} +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u8 %in) +; HSAIL-NEXT: { +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in]; +define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { + %tmp0 = zext i8 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i8_sext_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_s8 %in) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_s8 {{\$s[0-9]+}}, [%in]; +define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { + %tmp0 = sext i8 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u16 %in) +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 65535; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { + %tmp0 = zext i16 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i16_zext_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u16 %in) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in]; +define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { + %tmp0 = zext i16 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i16_sext_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_s16 %in) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(2)_s16 {{\$s[0-9]+}}, [%in]; +define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { + %tmp0 = sext i16 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u32 %in) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { + store i32 %in, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_f32 %in) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in]; +define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { + store float %in, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v2i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(2) arg_u8 %in[2]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][1]; +define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { + store <2 x i8> %in, <2 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v2i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(4) arg_u16 %in[2]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][2]; +define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { + store <2 x i16> %in, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v2i32_arg()({{$}} +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_u32 %in[2]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][4]; +define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { + store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v2f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_f32 %in[2]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(8)_f32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in][4]; +define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { + store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v3i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(4) arg_u8 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(4)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in][2]; +define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { + store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v3i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_u16 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(8)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: shl_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 16; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 65535; +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { + store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v3i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u32 %in[4]) + +; HSAIL: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: ld_arg_align(16)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in][8]; + +; HSAIL-DAG: st_global_align(4)_u64 {{\$d[0-9]+}}, {{\[}}[[OUT]]{{\]}}; +; HSAIL-DAG: st_global_align(4)_u32 {{\$s[0-9]+}}, {{\[}}[[OUT]]+8]; + +; XHSAIL: pack_u32x2_u32 {{\$d[0-9]+}}, _u32x2(0,0), {{\$s[0-9]+}}, 1; +; XHSAIL: ld_arg_align(16)_u32 {{\$s[0-9]+}}, [%in]; +; XHSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; XHSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; + +; HSAIL: ret; +define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { + store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v3f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_f32 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(16)_f32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(8)_f32 {{\$s[0-9]+}}, [%in][8]; + +; XHSAIL: pack_u32x2_u32 {{\$d[0-9]+}}, _u32x2(0,0), {{\$s[0-9]+}}, 1; +; XHSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; XHSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; + +; HSAIL: ret; +define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { + store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v4i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(4) arg_u8 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(4)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in][2]; +define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { + store <4 x i8> %in, <4 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v4i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_u16 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(8)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][6]; +define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { + store <4 x i16> %in, <4 x i16> addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}prog function &v4i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u32 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(16)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][12]; +define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { + store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v4f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_f32 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(16)_f32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(8)_f32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in][12]; +define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { + store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 + ret void +} + + +; FUNC-LABEL: {{^}}prog function &v8i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(8) arg_u8 %in[8]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(8)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][1]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][3]; +; HSAIL: ld_arg_align(4)_u8 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][5]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in][6]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][7]; +; HSAIL: ret; +define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { + store <8 x i8> %in, <8 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v8i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u16 %in[8]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(16)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][6]; +; HSAIL: ld_arg_align(8)_u16 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][10]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][14]; +define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { + store <8 x i16> %in, <8 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v8i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_u32 %in[8]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(32)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_arg_align(16)_u32 {{\$s[0-9]+}}, [%in][16]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][20]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in][24]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][28]; +define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { + store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v8f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_f32 %in[8]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(32)_f32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(8)_f32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_arg_align(16)_f32 {{\$s[0-9]+}}, [%in][16]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in][20]; +; HSAIL: ld_arg_align(8)_f32 {{\$s[0-9]+}}, [%in][24]; +; HSAIL: ld_arg_align(4)_f32 {{\$s[0-9]+}}, [%in][28]; +define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { + store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v16i8_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u8 %in[16]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(16)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][3]; +; HSAIL: ld_arg_align(4)_u8 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][5]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in][6]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][7]; +; HSAIL: ld_arg_align(8)_u8 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][9]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in][10]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][11]; +; HSAIL: ld_arg_align(4)_u8 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][13]; +; HSAIL: ld_arg_align(2)_u8 {{\$s[0-9]+}}, [%in][14]; +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%in][15]; +define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { + store <16 x i8> %in, <16 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v16i16_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_u16 %in[16]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(32)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][6]; +; HSAIL: ld_arg_align(8)_u16 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][10]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][14]; +; HSAIL: ld_arg_align(16)_u16 {{\$s[0-9]+}}, [%in][16]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][18]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in][20]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][22]; +; HSAIL: ld_arg_align(8)_u16 {{\$s[0-9]+}}, [%in][24]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][26]; +; HSAIL: ld_arg_align(4)_u16 {{\$s[0-9]+}}, [%in][28]; +; HSAIL: ld_arg_align(2)_u16 {{\$s[0-9]+}}, [%in][30]; +define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { + store <16 x i16> %in, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v16i32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(64) arg_u32 %in[16]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(64)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_arg_align(16)_u32 {{\$s[0-9]+}}, [%in][16]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][20]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in][24]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][28]; +; HSAIL: ld_arg_align(32)_u32 {{\$s[0-9]+}}, [%in][32]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][36]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in][40]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][44]; +; HSAIL: ld_arg_align(16)_u32 {{\$s[0-9]+}}, [%in][48]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][52]; +; HSAIL: ld_arg_align(8)_u32 {{\$s[0-9]+}}, [%in][56]; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in][60]; +; HSAIL: ret; +define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { + store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v16f32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(64) arg_f32 %in[16]) +define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { + store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u64 %a) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%a]; +define void @i64_arg(i64 addrspace(1)* %out, i64 %a) nounwind { + store i64 %a, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v2i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(16) arg_u64 %in[2]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(16)_u64 {{\$d[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%in][8]; +define void @v2i64_arg(<2 x i64> addrspace(1)* nocapture %out, <2 x i64> %in) nounwind { + store <2 x i64> %in, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v3i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_u64 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(32)_u64 {{\$d[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(16)_u64 {{\$d[0-9]+}}, [%in][16]; +define void @v3i64_arg(<3 x i64> addrspace(1)* nocapture %out, <3 x i64> %in) nounwind { + store <3 x i64> %in, <3 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v4i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(32) arg_u64 %in[4]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(32)_u64 {{\$d[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(16)_u64 {{\$d[0-9]+}}, [%in][16]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%in][24]; +define void @v4i64_arg(<4 x i64> addrspace(1)* nocapture %out, <4 x i64> %in) nounwind { + store <4 x i64> %in, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v8i64_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(64) arg_u64 %in[8]) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_arg_align(64)_u64 {{\$d[0-9]+}}, [%in]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%in][8]; +; HSAIL: ld_arg_align(16)_u64 {{\$d[0-9]+}}, [%in][16]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%in][24]; +; HSAIL: ld_arg_align(32)_u64 {{\$d[0-9]+}}, [%in][32]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%in][40]; +; HSAIL: ld_arg_align(16)_u64 {{\$d[0-9]+}}, [%in][48]; +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%in][56]; +define void @v8i64_arg(<8 x i64> addrspace(1)* nocapture %out, <8 x i64> %in) nounwind { + store <8 x i64> %in, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &array_4xi32_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u32 %x[4]) +; HSAIL-NEXT: { +; HSAIL-DAG: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%x]; +; HSAIL-DAG: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%x][4]; +; HSAIL-DAG: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%x][8]; +; HSAIL-DAG: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%x][12]; +; HSAIL: ret; +define void @array_4xi32_arg([4 x i32] addrspace(1)* %out, [4 x i32] %x) nounwind { + store [4 x i32] %x, [4 x i32] addrspace(1)* %out + ret void +} + +%struct.i32pair = type { i32, i32 } + +; FUNC-LABEL: {{^}}prog function &struct_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: align(4) arg_u8 %x[8]) +; HSAIL-DAG: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%x]; +; HSAIL-DAG: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%x][4]; +; HSAIL: ret; +define void @struct_arg(%struct.i32pair addrspace(1)* %out, %struct.i32pair %x) { + store %struct.i32pair %x, %struct.i32pair addrspace(1)* %out + ret void +} + +; FIXME: This is wrong +%struct.foo = type { i32, i16, [3 x i32] } + +; FUNC-LABEL: {{^}}prog function &struct_byval_arg()( +; HSAIL-NEXT: arg_u32 %out, +; HSAIL-NEXT: arg_u32 %x) +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%x]; +; HSAIL: ld_private_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+12]; +; HSAIL: ret; +define void @struct_byval_arg(i32 addrspace(1)* %out, %struct.foo* byval %x) { + %ingep = getelementptr %struct.foo, %struct.foo* %x, i32 0, i32 2, i32 1 + %load = load i32, i32* %ingep + store i32 %load, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/generic-address-bug9899.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/generic-address-bug9899.ll @@ -0,0 +1,62 @@ +; XFAIL: * +; AddrSpaceCast optimization. Segment load/store should be generated after optimization. +; +; RUN: llc -march=hsail-64 -filetype=asm -o - %s | FileCheck %s + +target triple = "hsail64-pc-unknown-amdopencl" + + +; CHECK: ld_v2_global +; CHECK: st_v2_global +define spir_kernel void @__OpenCL_sample_test_kernel(i32 addrspace(1)* nocapture %sourceA, i32 addrspace(1)* nocapture %destValues) nounwind { +entry: + %0 = tail call spir_func i64 @__hsail_ld_kernarg_u64(i32 0) nounwind readnone + %1 = tail call spir_func i32 @__hsail_get_global_id(i32 0) nounwind readnone + %2 = zext i32 %1 to i64 + %3 = add i64 %2, %0 + %sext = shl i64 %3, 32 + %4 = bitcast i32 addrspace(1)* %destValues to i32 addrspace(4)* + %5 = bitcast i32 addrspace(1)* %sourceA to i32 addrspace(4)* + %6 = ashr exact i64 %sext, 31 + %7 = getelementptr inbounds i32, i32 addrspace(4)* %5, i64 %6 + %arrayidx_v4 = bitcast i32 addrspace(4)* %7 to <2 x i32> addrspace(4)* + %vecload = load <2 x i32>, <2 x i32> addrspace(4)* %arrayidx_v4, align 4 + %8 = extractelement <2 x i32> %vecload, i32 0 + %9 = extractelement <2 x i32> %vecload, i32 1 + %10 = tail call spir_func i32 @__hsail_lastbit_u32(i32 %8) nounwind readonly + %11 = icmp eq i32 %8, 0 + %12 = select i1 %11, i32 32, i32 %10 + %13 = tail call spir_func i32 @__hsail_lastbit_u32(i32 %9) nounwind readonly + %14 = icmp eq i32 %9, 0 + %15 = select i1 %14, i32 32, i32 %13 + %16 = getelementptr inbounds i32, i32 addrspace(4)* %4, i64 %6 + %17 = insertelement <2 x i32> undef, i32 %12, i32 0 + %18 = insertelement <2 x i32> %17, i32 %15, i32 1 + %arrayidx_v41 = bitcast i32 addrspace(4)* %16 to <2 x i32> addrspace(4)* + store <2 x i32> %18, <2 x i32> addrspace(4)* %arrayidx_v41, align 4 + ret void +} + +declare spir_func i32 @__hsail_lastbit_u32(i32) nounwind readonly + +declare spir_func i64 @__hsail_ld_kernarg_u64(i32) nounwind readnone + +declare spir_func i32 @__hsail_get_global_id(i32) nounwind readnone + +!opencl.kernels = !{!0} +!opencl.enable.FP_CONTRACT = !{} +!opencl.spir.version = !{!6} +!opencl.ocl.version = !{!7} +!opencl.used.extensions = !{!8} +!opencl.used.optional.core.features = !{!8} +!opencl.compiler.options = !{!8} + +!0 = metadata !{void (i32 addrspace(1)*, i32 addrspace(1)*)* @__OpenCL_sample_test_kernel, metadata !1, metadata !2, metadata !3, metadata !4, metadata !5} +!1 = metadata !{metadata !"kernel_arg_addr_space", i32 1, i32 1} +!2 = metadata !{metadata !"kernel_arg_access_qual", metadata !"none", metadata !"none"} +!3 = metadata !{metadata !"kernel_arg_type", metadata !"int*", metadata !"int*"} +!4 = metadata !{metadata !"kernel_arg_type_qual", metadata !"", metadata !""} +!5 = metadata !{metadata !"kernel_arg_base_type", metadata !"int*", metadata !"int*"} +!6 = metadata !{i32 1, i32 2} +!7 = metadata !{i32 2, i32 0} +!8 = metadata !{} Index: test/CodeGen/HSAIL/global-atomics.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/global-atomics.ll @@ -0,0 +1,484 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xchg_ret_i32( +; HSAIL: atomic_exch_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xchg_monotonic_ret_i32( +; HSAIL: atomic_exch_global_rlx_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_xchg_monotonic_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(1)* %ptr, i32 4 monotonic + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xchg_acquire_ret_i32( +; HSAIL: atomic_exch_global_scacq_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_xchg_acquire_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(1)* %ptr, i32 4 acquire + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xchg_release_ret_i32( +; HSAIL: atomic_exch_global_screl_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_xchg_release_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(1)* %ptr, i32 4 release + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xchg_acq_rel_ret_i32( +; HSAIL: atomic_exch_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_xchg_acq_rel_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(1)* %ptr, i32 4 acq_rel + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xchg_ret_i32_offset( +; HSAIL: atomic_exch_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw xchg i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_add_ret_i32( +; HSAIL: atomic_add_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_add_ret_i32_offset( +; HSAIL: atomic_add_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_inc_ret_i32( +; HSAIL: atomic_add_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 1; +define void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(1)* %ptr, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_inc_ret_i32_offset( +; HSAIL: atomic_add_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 1; +define void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(1)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_sub_ret_i32( +; HSAIL: atomic_sub_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_sub_ret_i32_offset( +; HSAIL: atomic_sub_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_dec_ret_i32( +; HSAIL: atomic_sub_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 1; +define void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(1)* %ptr, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_dec_ret_i32_offset( +; HSAIL: atomic_sub_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 1; +define void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(1)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_and_ret_i32( +; HSAIL: atomic_and_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw and i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_and_ret_i32_offset( +; HSAIL: atomic_and_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw and i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_or_ret_i32( +; HSAIL: atomic_or_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw or i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_or_ret_i32_offset( +; HSAIL: atomic_or_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw or i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xor_ret_i32( +; HSAIL: atomic_xor_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw xor i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xor_ret_i32_offset( +; HSAIL: atomic_xor_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw xor i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: There is no atomic nand instr +; XHSAIL-LABEL: {{^}}prog function &global_atomic_nand_ret_i32(uction, so we somehow need to expand this. +; define void @global_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { +; %result = atomicrmw nand i32 addrspace(1)* %ptr, i32 4 seq_cst +; store i32 %result, i32 addrspace(1)* %out, align 4 +; ret void +; } + +; HSAIL-LABEL: {{^}}prog function &global_atomic_min_ret_i32( +; HSAIL: atomic_min_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw min i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_min_ret_i32_offset( +; HSAIL: atomic_min_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw min i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_max_ret_i32( +; HSAIL: atomic_max_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw max i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_max_ret_i32_offset( +; HSAIL: atomic_max_global_scar_system_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw max i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_umin_ret_i32( +; HSAIL: atomic_min_global_scar_system_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw umin i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_umin_ret_i32_offset( +; HSAIL: atomic_min_global_scar_system_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw umin i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_umax_ret_i32( +; HSAIL: atomic_max_global_scar_system_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw umax i32 addrspace(1)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_umax_ret_i32_offset( +; HSAIL: atomic_max_global_scar_system_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw umax i32 addrspace(1)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xchg_noret_i32( +; HSAIL: atomic_exch_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @global_atomic_xchg_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xchg_noret_i32_offset( +; HSAIL: atomic_exch_global_scar_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_xchg_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw xchg i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_add_noret_i32( +; HSAIL: atomicnoret_add_global_scar_system_s32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_add_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_add_noret_i32_offset( +; HSAIL: atomicnoret_add_global_scar_system_s32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_add_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_inc_noret_i32( +define void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(1)* %ptr, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_inc_noret_i32_offset( +define void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(1)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_inc_noret_i32_bad_si_offset( +define void @global_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(1)* %ptr, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %add + %result = atomicrmw add i32 addrspace(1)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_sub_noret_i32( +; HSAIL: atomicnoret_sub_global_scar_system_s32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_sub_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_sub_noret_i32_offset( +; HSAIL: atomicnoret_sub_global_scar_system_s32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_sub_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_dec_noret_i32( +define void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(1)* %ptr, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_dec_noret_i32_offset( +define void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(1)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_and_noret_i32( +; HSAIL: atomicnoret_and_global_scar_system_b32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_and_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw and i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_and_noret_i32_offset( +; HSAIL: atomicnoret_and_global_scar_system_b32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_and_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw and i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_or_noret_i32( +; HSAIL: atomicnoret_or_global_scar_system_b32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_or_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw or i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_or_noret_i32_offset( +; HSAIL: atomicnoret_or_global_scar_system_b32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_or_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw or i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xor_noret_i32( +; HSAIL: atomicnoret_xor_global_scar_system_b32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_xor_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw xor i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_xor_noret_i32_offset( +; HSAIL: atomicnoret_xor_global_scar_system_b32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_xor_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw xor i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; FIXME: There is no atomic nand instr +; XHSAIL-LABEL: {{^}}prog function &global_atomic_nand_noret_i32(uction, so we somehow need to expand this. +; define void @global_atomic_nand_noret_i32(i32 addrspace(1)* %ptr) nounwind { +; %result = atomicrmw nand i32 addrspace(1)* %ptr, i32 4 seq_cst +; ret void +; } + +; HSAIL-LABEL: {{^}}prog function &global_atomic_min_noret_i32( +; HSAIL: atomicnoret_min_global_scar_system_s32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_min_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw min i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_min_noret_i32_offset( +; HSAIL: atomicnoret_min_global_scar_system_s32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_min_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw min i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_max_noret_i32( +; HSAIL: atomicnoret_max_global_scar_system_s32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_max_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw max i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_max_noret_i32_offset( +; HSAIL: atomicnoret_max_global_scar_system_s32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_max_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw max i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_umin_noret_i32( +; HSAIL: atomicnoret_min_global_scar_system_u32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_umin_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw umin i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_umin_noret_i32_offset( +; HSAIL: atomicnoret_min_global_scar_system_u32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_umin_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw umin i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_umax_noret_i32( +; HSAIL: atomicnoret_max_global_scar_system_u32 [{{\$s[0-9]+}}], 4; +define void @global_atomic_umax_noret_i32(i32 addrspace(1)* %ptr) nounwind { + %result = atomicrmw umax i32 addrspace(1)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_umax_noret_i32_offset( +; HSAIL: atomicnoret_max_global_scar_system_u32 [{{\$s[0-9]+}}+16], 4; +define void @global_atomic_umax_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 + %result = atomicrmw umax i32 addrspace(1)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_load_acquire_ret_i32( +; HSAIL: atomic_ld_global_scacq_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @global_atomic_load_acquire_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(1)* %ptr acquire, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_load_seq_cst_ret_i32( +; HSAIL: memfence_screl_system; +; HSAIL: atomic_ld_global_scacq_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @global_atomic_load_seq_cst_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_load_monotonic_ret_i32( +; HSAIL: atomic_ld_global_rlx_system_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @global_atomic_load_monotonic_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(1)* %ptr monotonic, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_store_release_i32( +; HSAIL: atomicnoret_st_global_screl_system_b32 [{{\$s[0-9]+}}], 9; +define void @global_atomic_store_release_i32(i32 addrspace(1)* %ptr) nounwind { + store atomic i32 9, i32 addrspace(1)* %ptr release, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_store_seq_cst_i32( +; HSAIL: atomicnoret_st_global_screl_system_b32 [{{\$s[0-9]+}}], 9; +; HSAIL: memfence_scacq_system; +define void @global_atomic_store_seq_cst_i32(i32 addrspace(1)* %ptr) nounwind { + store atomic i32 9, i32 addrspace(1)* %ptr seq_cst, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &global_atomic_store_monotonic_i32( +; HSAIL: atomicnoret_st_global_rlx_system_b32 [{{\$s[0-9]+}}], {{\$s[0-9]+}}; +define void @global_atomic_store_monotonic_i32(i32 addrspace(1)* %ptr, i32 %foo) nounwind { + store atomic i32 %foo, i32 addrspace(1)* %ptr monotonic, align 4 + ret void +} Index: test/CodeGen/HSAIL/global-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/global-i1.ll @@ -0,0 +1,47 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; HSAIL: prog readonly_u8 &gval_init_true = 1; +; HSAIL; prog readonly_u8 &gval_init_false = 0; +; HSAIL: prog readonly_u8 &gval_undef; +; HSAIL: prog align(4) readonly_u8 &gval_arr_zeroinit[1024] = u8[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL: prog align(4) readonly_u8 &gval_arr10_init[10] = u8[](1, 0, 1, 0, 1, 0, 1, 0, 1, 0); +; HSAIL: prog align(4) readonly_u8 &gval_arr3_init[3] = u8[](1, 0, 1); +; HSAIL: prog align(4) readonly_u8 &gval_arr8_init[8] = u8[](1, 0, 1, 1, 0, 1, 1, 1); + +@gval_init_true = addrspace(2) global i1 true +@gval_init_false = addrspace(2) global i1 false +@gval_undef = addrspace(2) global i1 undef + +@gval_arr_zeroinit = addrspace(2) global [1024 x i1] zeroinitializer +@gval_arr10_init = addrspace(2) global [10 x i1] [i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false] +@gval_arr3_init = addrspace(2) global [3 x i1] [i1 true, i1 false, i1 true] +@gval_arr8_init = addrspace(2) global [8 x i1] [i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true] + +@gval = addrspace(3) global i1 undef +@gval_arr = addrspace(3) global [1024 x i1] undef + +; FIXME: i1 vector initializers don't work. + + +; FIXME: Why isn't this declared as i8? +; HSAIL-LABEL: {{^}}prog function &load_i1_global +; HSAIL: group_u8 %gval; +; HSAIL: ld_group_u8 {{\$s[0-9]+}}, [%gval]; +define void @load_i1_global(i32 addrspace(1)* %out) { + %ld = load i1, i1 addrspace(3)* @gval + %val = zext i1 %ld to i32 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; FIXME: Why isn't this declared as b8? +; HSAIL-LABEL: {{^}}prog function &load_i1_arr_global +; HSAIL: align(4) group_u8 %gval_arr[1024]; +; HSAIL: ld_group_u8 {{\$s[0-9]+}}, [%gval_arr][7]; +define void @load_i1_arr_global(i32 addrspace(1)* %out) { + %gep = getelementptr [1024 x i1], [1024 x i1] addrspace(3)* @gval_arr, i32 0, i32 7 + %ld = load i1, i1 addrspace(3)* %gep + %val = zext i1 %ld to i32 + store i32 %val, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/global-variable-alias.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/global-variable-alias.ll @@ -0,0 +1,12 @@ +; XFAIL: * +; RUN: llc -march=hsail < %s + +@0 = addrspace(2) global [4 x i32] [ i32 5, i32 4, i32 432, i32 3 ] + +@alias = alias [4 x i32] addrspace(2)* @0 + +define i32 @use_alias_gv() nounwind { + %gep = getelementptr [4 x i32], [4 x i32] addrspace(2)* @alias, i32 0, i32 1 + %load = load i32, i32 addrspace(2)* %gep + ret i32 %load +} Index: test/CodeGen/HSAIL/group-atomics.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/group-atomics.ll @@ -0,0 +1,484 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xchg_ret_i32( +; HSAIL: atomic_exch_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xchg_monotonic_ret_i32( +; HSAIL: atomic_exch_group_rlx_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_xchg_monotonic_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 monotonic + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xchg_acquire_ret_i32( +; HSAIL: atomic_exch_group_scacq_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_xchg_acquire_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 acquire + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xchg_release_ret_i32( +; HSAIL: atomic_exch_group_screl_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_xchg_release_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 release + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xchg_acq_rel_ret_i32( +; HSAIL: atomic_exch_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_xchg_acq_rel_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 acq_rel + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xchg_ret_i32_offset( +; HSAIL: atomic_exch_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_add_ret_i32( +; HSAIL: atomic_add_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_add_ret_i32_offset( +; HSAIL: atomic_add_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_inc_ret_i32( +; HSAIL: atomic_add_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 1; +define void @group_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_inc_ret_i32_offset( +; HSAIL: atomic_add_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 1; +define void @group_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_sub_ret_i32( +; HSAIL: atomic_sub_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_sub_ret_i32_offset( +; HSAIL: atomic_sub_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_dec_ret_i32( +; HSAIL: atomic_sub_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 1; +define void @group_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_dec_ret_i32_offset( +; HSAIL: atomic_sub_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 1; +define void @group_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_and_ret_i32( +; HSAIL: atomic_and_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_and_ret_i32_offset( +; HSAIL: atomic_and_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_or_ret_i32( +; HSAIL: atomic_or_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_or_ret_i32_offset( +; HSAIL: atomic_or_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xor_ret_i32( +; HSAIL: atomic_xor_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xor_ret_i32_offset( +; HSAIL: atomic_xor_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: There is no atomic nand instr +; XHSAIL-LABEL: {{^}}prog function &group_atomic_nand_ret_i32(uction, so we somehow need to expand this. +; define void @group_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { +; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst +; store i32 %result, i32 addrspace(1)* %out, align 4 +; ret void +; } + +; HSAIL-LABEL: {{^}}prog function &group_atomic_min_ret_i32( +; HSAIL: atomic_min_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_min_ret_i32_offset( +; HSAIL: atomic_min_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_max_ret_i32( +; HSAIL: atomic_max_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_max_ret_i32_offset( +; HSAIL: atomic_max_group_scar_wg_s32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_umin_ret_i32( +; HSAIL: atomic_min_group_scar_wg_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_umin_ret_i32_offset( +; HSAIL: atomic_min_group_scar_wg_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_umax_ret_i32( +; HSAIL: atomic_max_group_scar_wg_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_umax_ret_i32_offset( +; HSAIL: atomic_max_group_scar_wg_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xchg_noret_i32( +; HSAIL: atomic_exch_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}], 4; +define void @group_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xchg_noret_i32_offset( +; HSAIL: atomic_exch_group_scar_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_add_noret_i32( +; HSAIL: atomicnoret_add_group_scar_wg_s32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_add_noret_i32_offset( +; HSAIL: atomicnoret_add_group_scar_wg_s32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_inc_noret_i32( +define void @group_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_inc_noret_i32_offset( +define void @group_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_inc_noret_i32_bad_si_offset( +define void @group_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind { + %sub = sub i32 %a, %b + %add = add i32 %sub, 4 + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 %add + %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_sub_noret_i32( +; HSAIL: atomicnoret_sub_group_scar_wg_s32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_sub_noret_i32_offset( +; HSAIL: atomicnoret_sub_group_scar_wg_s32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_dec_noret_i32( +define void @group_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_dec_noret_i32_offset( +define void @group_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_and_noret_i32( +; HSAIL: atomicnoret_and_group_scar_wg_b32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_and_noret_i32_offset( +; HSAIL: atomicnoret_and_group_scar_wg_b32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_or_noret_i32( +; HSAIL: atomicnoret_or_group_scar_wg_b32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_or_noret_i32_offset( +; HSAIL: atomicnoret_or_group_scar_wg_b32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xor_noret_i32( +; HSAIL: atomicnoret_xor_group_scar_wg_b32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_xor_noret_i32_offset( +; HSAIL: atomicnoret_xor_group_scar_wg_b32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; FIXME: There is no atomic nand instr +; XHSAIL-LABEL: {{^}}prog function &group_atomic_nand_noret_i32(uction, so we somehow need to expand this. +; define void @group_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind { +; %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst +; ret void +; } + +; HSAIL-LABEL: {{^}}prog function &group_atomic_min_noret_i32( +; HSAIL: atomicnoret_min_group_scar_wg_s32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_min_noret_i32_offset( +; HSAIL: atomicnoret_min_group_scar_wg_s32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_max_noret_i32( +; HSAIL: atomicnoret_max_group_scar_wg_s32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_max_noret_i32_offset( +; HSAIL: atomicnoret_max_group_scar_wg_s32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_umin_noret_i32( +; HSAIL: atomicnoret_min_group_scar_wg_u32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_umin_noret_i32_offset( +; HSAIL: atomicnoret_min_group_scar_wg_u32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_umax_noret_i32( +; HSAIL: atomicnoret_max_group_scar_wg_u32 [{{\$s[0-9]+}}], 4; +define void @group_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind { + %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_umax_noret_i32_offset( +; HSAIL: atomicnoret_max_group_scar_wg_u32 [{{\$s[0-9]+}}+16], 4; +define void @group_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { + %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 + %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_load_acquire_ret_i32( +; HSAIL: atomic_ld_group_scacq_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @group_atomic_load_acquire_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(3)* %ptr acquire, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_load_seq_cst_ret_i32( +; HSAIL: memfence_screl_wg; +; HSAIL: atomic_ld_group_scacq_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @group_atomic_load_seq_cst_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(3)* %ptr seq_cst, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_load_monotonic_ret_i32( +; HSAIL: atomic_ld_group_rlx_wg_b32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @group_atomic_load_monotonic_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind { + %result = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_store_release_i32( +; HSAIL: atomicnoret_st_group_screl_wg_b32 [{{\$s[0-9]+}}], 9; +define void @group_atomic_store_release_i32(i32 addrspace(3)* %ptr) nounwind { + store atomic i32 9, i32 addrspace(3)* %ptr release, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_store_seq_cst_i32( +; HSAIL: atomicnoret_st_group_screl_wg_b32 [{{\$s[0-9]+}}], 9; +; HSAIL: memfence_scacq_wg; +define void @group_atomic_store_seq_cst_i32(i32 addrspace(3)* %ptr) nounwind { + store atomic i32 9, i32 addrspace(3)* %ptr seq_cst, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &group_atomic_store_monotonic_i32( +; HSAIL: atomicnoret_st_group_rlx_wg_b32 [{{\$s[0-9]+}}], {{\$s[0-9]+}}; +define void @group_atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %foo) nounwind { + store atomic i32 %foo, i32 addrspace(3)* %ptr monotonic, align 4 + ret void +} Index: test/CodeGen/HSAIL/gv-const-addrspace.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/gv-const-addrspace.ll @@ -0,0 +1,274 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +%struct.foo = type { float, [5 x i32] } + + +; HSAIL-DAG: prog align(16) readonly_u32 &vector_v4i32[4] = u32[](47, 256, 99, 1299); +; HSAIL-DAG: prog align(16) readonly_u32 &vector_v3i32[4] = u32[](47, 256, 12, 0); +; HSAIL-DAG: prog align(16) readonly_u32 &zero_vector_v4i32[4] = u32[](0, 0, 0, 0); +; HSAIL-DAG: prog align(16) readonly_u32 &zero_vector_v3i32[4] = u32[](0, 0, 0, 0); + +; HSAIL-DAG: prog readonly_u32 &array_array[16] = u32[](1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4); +; HSAIL-DAG: prog readonly_u32 &array_array_array[32] = u32[](1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4); +; HSAIL-DAG: prog align(8) readonly_u32 &array_array_vector[16] = u32[](1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); +; HSAIL-DAG: prog align(8) readonly_u8 &array_array_struct[192] = u8[](0, 0, 0, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + +; HSAIL-DAG: readonly_f32 &float_gv[5] = f32[](0F00000000, 0F3f800000, 0F40000000, 0F40400000, 0F40800000); +; HSAIL-DAG: readonly_f64 &double_gv[5] = f64[](0D0000000000000000, 0D3ff0000000000000, 0D4000000000000000, 0D4008000000000000, 0D4010000000000000); +; HSAIL-DAG: readonly_u32 &i32_gv[5] = u32[](0, 1, 2, 3, 4); +; HSAIL-DAG: align(8) readonly_u8 &struct_foo_gv[24] = u8[](0, 0, 128, 65, 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0); +; HSAIL-DAG: readonly_u32 &array_v1_gv[4] = u32[](1, 2, 3, 4); +; HSAIL-DAG: align(8) readonly_u8 &struct_foo_zeroinit[240] = u8[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL-DAG: align(8) readonly_u8 &bare_struct_foo_zeroinit[24] = u8[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + +;HSAIL-DAG: align(8) readonly_u8 &struct_foo_partial_zeroinit[48] = u8[](0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + +; HSAIL-DAG: prog align(8) readonly_u8 &struct_foo_undefinit[240]; +; HSAIL-DAG: prog align(8) readonly_u8 &bare_struct_foo_undefinit[24]; +; HSAIL-DAG: prog align(8) readonly_u8 &struct_foo_partial_undefinit[48] = u8[](0, 0, 128, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + +; HSAIL-DAG: readonly_u32 &zeroinit_scalar_array[1025] = u32[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL-DAG: align(16) readonly_u32 &zeroinit_vector_array[16] = u32[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + +; HSAIL-DAG: prog readonly_u32 &undefinit_scalar_array[1025]; +; HSAIL-DAG: prog align(16) readonly_u32 &undefinit_vector_array[16]; + +@scalar_i32 = addrspace(2) constant i32 99 +@vector_v4i32 = addrspace(2) constant <4 x i32> +@vector_v3i32 = addrspace(2) constant <3 x i32> + +@zero_vector_v4i32 = addrspace(2) constant <4 x i32> zeroinitializer +@zero_vector_v3i32 = addrspace(2) constant <3 x i32> zeroinitializer + +@array_array = addrspace(2) constant [4 x [4 x i32]] [ + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ] +] + +@array_array_array = addrspace(2) constant [2 x [4 x [4 x i32]]] [ + [4 x [4 x i32]] [ + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ] + ], + [4 x [4 x i32]] [ + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ], + [4 x i32] [i32 1, i32 2, i32 3, i32 4 ] + ] +] + +@array_array_vector = addrspace(2) constant [2 x [4 x <2 x i32>]] [ + [4 x <2 x i32>] [ + <2 x i32> , + <2 x i32> , + <2 x i32> , + <2 x i32> + ], + [4 x <2 x i32>] [ + <2 x i32> , + <2 x i32> , + <2 x i32> , + <2 x i32> + ] +] + +@array_array_struct = addrspace(2) constant [2 x [4 x %struct.foo]] [ + [4 x %struct.foo] [ + %struct.foo { float 0.5, [5 x i32] zeroinitializer }, + %struct.foo { float 1.0, [5 x i32] zeroinitializer }, + %struct.foo { float 2.0, [5 x i32] zeroinitializer }, + %struct.foo { float 4.0, [5 x i32] zeroinitializer } + ], + [4 x %struct.foo] [ + %struct.foo { float 8.0, [5 x i32] zeroinitializer }, + %struct.foo { float 16.0, [5 x i32] zeroinitializer }, + %struct.foo { float 32.0, [5 x i32] zeroinitializer }, + %struct.foo { float 64.0, [5 x i32] zeroinitializer } + ] +] + +@b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2 + +@float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4 +@double_gv = internal unnamed_addr addrspace(2) constant [5 x double] [double 0.0, double 1.0, double 2.0, double 3.0, double 4.0], align 4 + +@struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ] + +@struct_foo_zeroinit = unnamed_addr addrspace(2) constant [10 x %struct.foo] zeroinitializer + +@bare_struct_foo_zeroinit = unnamed_addr addrspace(2) constant %struct.foo zeroinitializer + +@struct_foo_partial_zeroinit = unnamed_addr addrspace(2) constant [2 x %struct.foo] [ + %struct.foo { float 1.0, [5 x i32] zeroinitializer }, + %struct.foo { float 2.0, [5 x i32] zeroinitializer } +] + +@struct_foo_undefinit = unnamed_addr addrspace(2) constant [10 x %struct.foo] undef + +@bare_struct_foo_undefinit = unnamed_addr addrspace(2) constant %struct.foo undef + +@struct_foo_partial_undefinit = unnamed_addr addrspace(2) constant [2 x %struct.foo] [ + %struct.foo { float 1.0, [5 x i32] undef }, + %struct.foo { float 2.0, [5 x i32] undef } +] + +@zeroinit_scalar_array = internal unnamed_addr addrspace(2) constant [1025 x i32] zeroinitializer +@zeroinit_vector_array = internal addrspace(2) constant [4 x <4 x i32>] zeroinitializer + +@undefinit_scalar_array = unnamed_addr addrspace(2) constant [1025 x i32] undef +@undefinit_vector_array = addrspace(2) constant [4 x <4 x i32>] undef + +@array_v1_gv = internal addrspace(2) constant [4 x <1 x i32>] [ <1 x i32> , + <1 x i32> , + <1 x i32> , + <1 x i32> ] + +; FUNC-LABEL: {{^}}prog function &float( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_f32 [[LD:\$s[0-9]+]], [&float_gv]{{\[}}[[ADDR]]{{\]}}; +; HSAIL: st_global_align(4)_f32 [[LD]] +; HSAIL: ret; +define void @float(float addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index + %tmp1 = load float, float addrspace(2)* %tmp0 + store float %tmp1, float addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}prog function &double( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 3; +; HSAIL: ld_readonly_align(8)_f64 [[LD:\$d[0-9]+]], [&double_gv]{{\[}}[[ADDR]]{{\]}}; +; HSAIL: st_global_align(8)_f64 [[LD]] +; HSAIL: ret; +define void @double(double addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [5 x double], [5 x double] addrspace(2)* @double_gv, i32 0, i32 %index + %tmp1 = load double, double addrspace(2)* %tmp0 + store double %tmp1, double addrspace(1)* %out + ret void +} + +@i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4 + +; FUNC-LABEL: {{^}}prog function &i32( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_u32 [[LD:\$s[0-9]+]], [&i32_gv]{{\[}}[[ADDR]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @i32(i32 addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index + %tmp1 = load i32, i32 addrspace(2)* %tmp0 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &struct_foo_gv_load( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_u32 [[LD:\$s[0-9]+]], [&struct_foo_gv]{{\[}}[[ADDR]]+4{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i32, i32 addrspace(2)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &struct_foo_gv_zeroinit_load( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_u32 [[LD:\$s[0-9]+]], [&struct_foo_zeroinit]{{\[}}[[ADDR]]+4{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @struct_foo_gv_zeroinit_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [10 x %struct.foo], [10 x %struct.foo] addrspace(2)* @struct_foo_zeroinit, i32 0, i32 0, i32 1, i32 %index + %load = load volatile i32, i32 addrspace(2)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &bare_struct_foo_gv_zeroinit_load( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_u32 [[LD:\$s[0-9]+]], [&bare_struct_foo_zeroinit]{{\[}}[[ADDR]]+4{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @bare_struct_foo_gv_zeroinit_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds %struct.foo, %struct.foo addrspace(2)* @bare_struct_foo_zeroinit, i32 0, i32 1, i32 %index + %load = load volatile i32, i32 addrspace(2)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &struct_foo_gv_partial_zeroinit_load( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_u32 [[LD:\$s[0-9]+]], [&struct_foo_partial_zeroinit]{{\[}}[[ADDR]]+4{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @struct_foo_gv_partial_zeroinit_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [2 x %struct.foo], [2 x %struct.foo] addrspace(2)* @struct_foo_partial_zeroinit, i32 0, i32 0, i32 1, i32 %index + %load = load volatile i32, i32 addrspace(2)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &array_v1_gv_load( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_u32 [[LD:\$s[0-9]+]], [&array_v1_gv]{{\[}}[[ADDR]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index + %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4 + store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4 + ret void +} + + +; FUNC-LABEL: {{^}}prog function &zeroinit_scalar_array_load( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_u32 [[LD:\$s[0-9]+]], [&zeroinit_scalar_array]{{\[}}[[ADDR]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @zeroinit_scalar_array_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [1025 x i32], [1025 x i32] addrspace(2)* @zeroinit_scalar_array, i32 0, i32 %index + %load = load i32, i32 addrspace(2)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &zeroinit_vector_array_load( +define void @zeroinit_vector_array_load(<4 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>] addrspace(2)* @zeroinit_vector_array, i32 0, i32 %index + %load = load <4 x i32>, <4 x i32> addrspace(2)* %gep, align 16 + store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +; FUNC-LABEL: {{^}}prog function &gv_addressing_in_branch( +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_readonly_align(4)_f32 [[LD:\$s[0-9]+]], [&float_gv]{{\[}}[[ADDR]]{{\]}}; +; HSAIL: st_global_align(4)_f32 [[LD]] +; HSAIL: ret; +define void @gv_addressing_in_branch(float addrspace(1)* %out, i32 %index, i32 %a) { +entry: + %tmp0 = icmp eq i32 0, %a + br i1 %tmp0, label %if, label %else + +if: + %tmp1 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index + %tmp2 = load float, float addrspace(2)* %tmp1 + store float %tmp2, float addrspace(1)* %out + br label %endif + +else: + store float 1.0, float addrspace(1)* %out + br label %endif + +endif: + ret void +} Index: test/CodeGen/HSAIL/gv-global-addrspace.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/gv-global-addrspace.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL: prog alloc(agent) global_u32 &global_i32_undef; +@global_i32_undef = addrspace(1) global i32 undef + +; HSAIL: prog alloc(agent) global_u32 &global_i32_zeroinit = 0; + +@global_i32_zeroinit = addrspace(1) global i32 zeroinitializer + +; HSAIL: prog readonly_u32 &readonly_i32_zeroinit = 0; + +@readonly_i32_zeroinit = addrspace(2) global i32 zeroinitializer Index: test/CodeGen/HSAIL/gv-group-addrspace.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/gv-group-addrspace.ll @@ -0,0 +1,158 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +%struct.foo = type { float, [5 x i32] } + + +@scalar_i32 = addrspace(3) global i32 undef +@scalar_f32 = addrspace(3) global float undef +@scalar_i1 = addrspace(3) global i1 undef + +@vector_v4i32 = addrspace(3) global <4 x i32> undef +@vector_v3i32 = addrspace(3) global <3 x i32> undef + +@array_i32 = addrspace(3) global [7 x i32] undef, align 4 + +@float_array_gv = addrspace(3) global [5 x float] undef +@double_array_gv = addrspace(3) global [5 x double] undef + +@struct_foo_gv = addrspace(3) global %struct.foo undef +@array_struct_foo_gv = addrspace(3) global [10 x %struct.foo] undef + +@array_v1_gv = addrspace(3) global [4 x <1 x i32>] undef + + +; HSAIL-LABEL: {{^}}prog function &test_scalar_i32()( +; HSAIL: { +; HSAIL-NEXT: group_u32 %scalar_i32; +; HSAIL: ld_group_align(4)_u32 {{\$s[0-9]+}}, [%scalar_i32]; +; HSAIL: ret; +define void @test_scalar_i32(i32 addrspace(1)* %out) { + %tmp1 = load i32, i32 addrspace(3)* @scalar_i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_scalar_f32()( +; HSAIL: { +; HSAIL-NEXT: group_f32 %scalar_f32; +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]+}}, [%scalar_f32]; +; HSAIL: ret; +define void @test_scalar_f32(float addrspace(1)* %out) { + %tmp1 = load float, float addrspace(3)* @scalar_f32 + store float %tmp1, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_vector_v4i32()( +; HSAIL: { +; HSAIL-NEXT: group_u32 %vector_v4i32[4]; +; HSAIL-DAG: ld_group_align(16)_u32 {{\$s[0-9]+}}, [%vector_v4i32]; +; HSAIL-DAG: ld_group_align(4)_u32 {{\$s[0-9]+}}, [%vector_v4i32][4]; +; HSAIL-DAG: ld_group_align(8)_u32 {{\$s[0-9]+}}, [%vector_v4i32][8]; +; HSAIL-DAG: ld_group_align(4)_u32 {{\$s[0-9]+}}, [%vector_v4i32][12]; +; HSAIL: ret; +define void @test_vector_v4i32(<4 x i32> addrspace(1)* %out) { + %tmp1 = load <4 x i32>, <4 x i32> addrspace(3)* @vector_v4i32 + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_vector_v3i32()( +; HSAIL: { +; HSAIL-NEXT: group_u32 %vector_v3i32[4]; +; HSAIL-DAG: ld_group_align(16)_u64 {{\$d[0-9]+}}, [%vector_v3i32]; +; HSAIL-DAG: ld_group_align(8)_u32 {{\$s[0-9]+}}, [%vector_v3i32][8]; +; HSAIL: ret; +define void @test_vector_v3i32(<3 x i32> addrspace(1)* %out) { + %tmp1 = load <3 x i32>, <3 x i32> addrspace(3)* @vector_v3i32 + store <3 x i32> %tmp1, <3 x i32> addrspace(1)* %out + ret void +} + +; FIXME: Align of load and declaration should match +; HSAIL-LABEL: {{^}}prog function &test_scalar_i1()( +; HSAIL: { +; HSAIL-NEXT: group_u8 %scalar_i1; +; HSAIL: ld_group_u8 {{\$s[0-9]+}}, [%scalar_i1]; +; HSAIL: ret; +define void @test_scalar_i1(i1 addrspace(1)* %out) { + %tmp1 = load i1, i1 addrspace(3)* @scalar_i1 + store i1 %tmp1, i1 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_array_i32()( +; HSAIL: { +; HSAIL-NEXT: group_u32 %array_i32[7]; +; HSAIL: ld_group_align(4)_u32 {{\$s[0-9]+}}, [%array_i32][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @test_array_i32(i32 addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [7 x i32], [7 x i32] addrspace(3)* @array_i32, i32 0, i32 %index + %tmp1 = load i32, i32 addrspace(3)* %tmp0 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_float_array_gv()( +; HSAIL: { +; HSAIL-NEXT: group_f32 %float_array_gv[5]; +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]+}}, [%float_array_gv][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @test_float_array_gv(float addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [5 x float], [5 x float] addrspace(3)* @float_array_gv, i32 0, i32 %index + %tmp1 = load float, float addrspace(3)* %tmp0 + store float %tmp1, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_double_array_gv()( +; HSAIL: group_f64 %double_array_gv[5]; +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 3; +; HSAIL: ld_group_align(8)_f64 [[LD:\$d[0-9]+]], [%double_array_gv]{{\[}}[[ADDR]]{{\]}}; +; HSAIL: st_global_align(8)_f64 [[LD]] +; HSAIL: ret; +define void @test_double_array_gv(double addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [5 x double], [5 x double] addrspace(3)* @double_array_gv, i32 0, i32 %index + %tmp1 = load double, double addrspace(3)* %tmp0 + store double %tmp1, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &struct_foo_gv_load()( +; HSAIL: align(8) group_u8 %struct_foo_gv[24]; +; HSAIL: ld_group_align(8)_u32 [[LD:\$s[0-9]+]], [%struct_foo_gv][8]; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* @struct_foo_gv, i32 0, i32 1, i32 1 + %load = load i32, i32 addrspace(3)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &array_struct_foo_gv_load()( +; HSAIL: align(8) group_u8 %array_struct_foo_gv[240]; +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_group_align(4)_u32 [[LD:\$s[0-9]+]], [%array_struct_foo_gv]{{\[}}[[ADDR]]+4{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @array_struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [10 x %struct.foo], [10 x %struct.foo] addrspace(3)* @array_struct_foo_gv, i32 0, i32 0, i32 1, i32 %index + %load = load i32, i32 addrspace(3)* %gep, align 4 + store i32 %load, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &array_v1_gv_load()( +; HSAIL: group_u32 %array_v1_gv[4]; + +; HSAIL: shl_u32 [[ADDR:\$s[0-9]+]], {{\$s[0-9]+}}, 2; +; HSAIL: ld_group_align(4)_u32 [[LD:\$s[0-9]+]], [%array_v1_gv]{{\[}}[[ADDR]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[LD]] +; HSAIL: ret; +define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) { + %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(3)* @array_v1_gv, i32 0, i32 %index + %load = load <1 x i32>, <1 x i32> addrspace(3)* %gep, align 4 + store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/HSAIL/gv-symbol-initializer.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/gv-symbol-initializer.ll @@ -0,0 +1,403 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL32 -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + + +; HSAIL: readonly_u32 &int0 = 9; +; HSAIL: readonly_u32 &int1 = 34; +; HSAIL: readonly_u32 &int2 = 123; +; HSAIL: readonly_u32 &int3 = 222; +; HSAIL: readonly_u32 &int4 = 2424; + + +; HSAIL32: readonly_u32 &symbol_array[5] = u32[](0, 0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&symbol_array:0:4:&int0:0"; +; HSAIL32: pragma "initvarwithaddress:&symbol_array:4:4:&int1:0"; +; HSAIL32: pragma "initvarwithaddress:&symbol_array:8:4:&int2:0"; +; HSAIL32: pragma "initvarwithaddress:&symbol_array:12:4:&int3:0"; +; HSAIL32: pragma "initvarwithaddress:&symbol_array:16:4:&int4:0"; + +; HSAIL32: readonly_u32 &i32_array[10] = u32[](234, 456, 789, 235, 495, 1, 4, 9, 10, 453); + +; HSAIL32: readonly_u32 &constantexpr_address_array[5] = u32[](0, 0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&constantexpr_address_array:0:4:&i32_array:0"; +; HSAIL32: pragma "initvarwithaddress:&constantexpr_address_array:4:4:&i32_array:16"; +; HSAIL32: pragma "initvarwithaddress:&constantexpr_address_array:8:4:&i32_array:32"; +; HSAIL32: pragma "initvarwithaddress:&constantexpr_address_array:12:4:&i32_array:36"; +; HSAIL32: pragma "initvarwithaddress:&constantexpr_address_array:16:4:&i32_array:12"; + +; HSAIL32: align(8) readonly_u8 &struct_foo_gv[16] = u8[](0, 0, 128, 65, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&struct_foo_gv:4:1:&constantexpr_address_array:0"; +; HSAIL32: pragma "initvarwithaddress:&struct_foo_gv:12:1:&symbol_array:0"; + +; HSAIL32: align(8) readonly_u8 &struct_bar_gv[24] = u8[](7, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, 212, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&struct_bar_gv:4:1:&constantexpr_address_array:0"; +; HSAIL32: pragma "initvarwithaddress:&struct_bar_gv:16:1:&symbol_array:0"; + +; HSAIL32: align(8) readonly_u8 &struct_packed_bar_gv[12] = u8[](7, 0, 0, 0, 0, 23, 45, 0, 0, 0, 0, 212); +; HSAIL32: pragma "initvarwithaddress:&struct_packed_bar_gv:1:1:&constantexpr_address_array:0"; +; HSAIL32: pragma "initvarwithaddress:&struct_packed_bar_gv:7:1:&symbol_array:0"; + +; HSAIL32: align(8) readonly_u8 &struct_mixed_nullptr_sizes_0[16] = u8[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL32: align(8) readonly_u8 &struct_mixed_nullptr_sizes_1[16] = u8[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&struct_mixed_nullptr_sizes_1:4:1:&int1:0"; +; HSAIL32: pragma "initvarwithaddress:&struct_mixed_nullptr_sizes_1:8:1:%lds.int0:0"; + +; HSAIL32: readonly_u32 &lds_symbol_array[5] = u32[](0, 0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&lds_symbol_array:0:4:%lds.int0:0"; +; HSAIL32: pragma "initvarwithaddress:&lds_symbol_array:4:4:%lds.int1:0"; +; HSAIL32: pragma "initvarwithaddress:&lds_symbol_array:8:4:%lds.int2:0"; +; HSAIL32: pragma "initvarwithaddress:&lds_symbol_array:12:4:%lds.int3:0"; +; HSAIL32: pragma "initvarwithaddress:&lds_symbol_array:16:4:%lds.int4:0"; + + + +; HSAIL32: prog readonly_u32 &scalar_p2i32 = 0; +; HSAIL32: pragma "initvarwithaddress:&scalar_p2i32:0:4:&int0:0"; + +; HSAIL32: prog readonly_u32 &scalar_p3i32 = 0; +; HSAIL32: pragma "initvarwithaddress:&scalar_p3i32:0:4:%lds.int0:0"; + +; HSAIL32: prog align(16) readonly_u32 &zero_vector_v4p2i32[4] = u32[](0, 0, 0, 0); +; HSAIL32: prog align(16) readonly_u32 &zero_vector_v3p2i32[4] = u32[](0, 0, 0, 0); + +; HSAIL32: prog readonly_u32 &p2i32_array[4] = u32[](0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&p2i32_array:0:4:&int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array:4:4:&int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array:8:4:&int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array:12:4:&int3:0"; + +; HSAIL32: prog readonly_u32 &p3i32_array[4] = u32[](0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&p3i32_array:0:4:%lds.int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array:4:4:%lds.int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array:8:4:%lds.int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array:12:4:%lds.int3:0"; + +; HSAIL32: prog readonly_u32 &p2i32_array_array[16] = u32[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:0:4:&int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:4:4:&int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:8:4:&int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:12:4:&int3:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:16:4:&int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:20:4:&int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:24:4:&int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:28:4:&int3:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:32:4:&int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:36:4:&int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:40:4:&int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:44:4:&int3:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:48:4:&int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:52:4:&int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:56:4:&int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p2i32_array_array:60:4:&int3:0"; + +; HSAIL32: prog readonly_u32 &p3i32_array_array[16] = u32[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:0:4:%lds.int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:4:4:%lds.int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:8:4:%lds.int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:12:4:%lds.int3:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:16:4:%lds.int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:20:4:%lds.int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:24:4:%lds.int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:28:4:%lds.int3:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:32:4:%lds.int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:36:4:%lds.int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:40:4:%lds.int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:44:4:%lds.int3:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:48:4:%lds.int0:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:52:4:%lds.int1:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:56:4:%lds.int2:0"; +; HSAIL32: pragma "initvarwithaddress:&p3i32_array_array:60:4:%lds.int3:0"; + +; HSAIL32: prog readonly_u32 &ptr_to_int_gv_p3i32 = 0; +; HSAIL32: pragma "initvarwithaddress:&ptr_to_int_gv_p3i32:0:4:%lds.int0:0"; + +; HSAIL32: prog readonly_u32 &ptr_to_int_array_gv_p3i32[4] = u32[](0, 0, 0, 0); +; HSAIL32: pragma "initvarwithaddress:&ptr_to_int_array_gv_p3i32:0:4:%lds.int0:0"; +; HSAIL32: pragma "initvarwithaddress:&ptr_to_int_array_gv_p3i32:4:4:%lds.int1:0"; +; HSAIL32: pragma "initvarwithaddress:&ptr_to_int_array_gv_p3i32:8:4:%lds.int2:0"; +; HSAIL32: pragma "initvarwithaddress:&ptr_to_int_array_gv_p3i32:12:4:%lds.int3:0"; + + +; HSAIL64: readonly_u64 &symbol_array[5] = u64[](0, 0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&symbol_array:0:8:&int0:0"; +; HSAIL64: pragma "initvarwithaddress:&symbol_array:8:8:&int1:0"; +; HSAIL64: pragma "initvarwithaddress:&symbol_array:16:8:&int2:0"; +; HSAIL64: pragma "initvarwithaddress:&symbol_array:24:8:&int3:0"; +; HSAIL64: pragma "initvarwithaddress:&symbol_array:32:8:&int4:0"; + +; HSAIL64: readonly_u32 &i32_array[10] = u32[](234, 456, 789, 235, 495, 1, 4, 9, 10, 453); +; HSAIL64: readonly_u64 &constantexpr_address_array[5] = u64[](0, 0, 0, 0, 0); + +; HSAIL64: pragma "initvarwithaddress:&constantexpr_address_array:0:8:&i32_array:0"; +; HSAIL64: pragma "initvarwithaddress:&constantexpr_address_array:8:8:&i32_array:16"; +; HSAIL64: pragma "initvarwithaddress:&constantexpr_address_array:16:8:&i32_array:32"; +; HSAIL64: pragma "initvarwithaddress:&constantexpr_address_array:24:8:&i32_array:36"; +; HSAIL64: pragma "initvarwithaddress:&constantexpr_address_array:32:8:&i32_array:12"; + +; HSAIL64: align(8) readonly_u8 &struct_foo_gv[32] = u8[](0, 0, 128, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&struct_foo_gv:8:1:&constantexpr_address_array:0"; +; HSAIL64: pragma "initvarwithaddress:&struct_foo_gv:24:1:&symbol_array:0"; + +; HSAIL64: align(8) readonly_u8 &struct_bar_gv[48] = u8[](7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212, 0, 0, 0, 0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&struct_bar_gv:8:1:&constantexpr_address_array:0"; +; HSAIL64: pragma "initvarwithaddress:&struct_bar_gv:32:1:&symbol_array:0"; + +; HSAIL64: align(8) readonly_u8 &struct_packed_bar_gv[20] = u8[](7, 0, 0, 0, 0, 0, 0, 0, 0, 23, 45, 0, 0, 0, 0, 0, 0, 0, 0, 212); +; HSAIL64: pragma "initvarwithaddress:&struct_packed_bar_gv:1:1:&constantexpr_address_array:0"; +; HSAIL64: pragma "initvarwithaddress:&struct_packed_bar_gv:11:1:&symbol_array:0"; + +; HSAIL64: align(8) readonly_u8 &struct_mixed_nullptr_sizes_0[32] = u8[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL64: align(8) readonly_u8 &struct_mixed_nullptr_sizes_1[32] = u8[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&struct_mixed_nullptr_sizes_1:8:1:&int1:0"; +; HSAIL64: pragma "initvarwithaddress:&struct_mixed_nullptr_sizes_1:16:1:%lds.int0:0"; + +; HSAIL64: readonly_u32 &lds_symbol_array[5] = u32[](0, 0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&lds_symbol_array:0:4:%lds.int0:0"; +; HSAIL64: pragma "initvarwithaddress:&lds_symbol_array:4:4:%lds.int1:0"; +; HSAIL64: pragma "initvarwithaddress:&lds_symbol_array:8:4:%lds.int2:0"; +; HSAIL64: pragma "initvarwithaddress:&lds_symbol_array:12:4:%lds.int3:0"; +; HSAIL64: pragma "initvarwithaddress:&lds_symbol_array:16:4:%lds.int4:0"; + + +; HSAIL64: prog readonly_u64 &scalar_p2i32 = 0; +; HSAIL64: pragma "initvarwithaddress:&scalar_p2i32:0:8:&int0:0"; + +; HSAIL64: prog readonly_u32 &scalar_p3i32 = 0; +; HSAIL64: pragma "initvarwithaddress:&scalar_p3i32:0:4:%lds.int0:0"; + +; HSAIL64: prog align(32) readonly_u64 &zero_vector_v4p2i32[4] = u64[](0, 0, 0, 0); +; HSAIL64: prog align(16) readonly_u32 &zero_vector_v3p2i32[4] = u32[](0, 0, 0, 0); + +; HSAIL64: prog readonly_u64 &p2i32_array[4] = u64[](0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&p2i32_array:0:8:&int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array:8:8:&int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array:16:8:&int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array:24:8:&int3:0"; + +; HSAIL64: prog readonly_u32 &p3i32_array[4] = u32[](0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&p3i32_array:0:4:%lds.int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array:4:4:%lds.int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array:8:4:%lds.int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array:12:4:%lds.int3:0"; + +; HSAIL64: prog readonly_u64 &p2i32_array_array[16] = u64[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:0:8:&int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:8:8:&int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:16:8:&int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:24:8:&int3:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:32:8:&int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:40:8:&int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:48:8:&int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:56:8:&int3:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:64:8:&int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:72:8:&int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:80:8:&int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:88:8:&int3:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:96:8:&int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:104:8:&int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:112:8:&int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p2i32_array_array:120:8:&int3:0"; + +; HSAIL64: prog readonly_u32 &p3i32_array_array[16] = u32[](0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:0:4:%lds.int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:4:4:%lds.int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:8:4:%lds.int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:12:4:%lds.int3:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:16:4:%lds.int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:20:4:%lds.int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:24:4:%lds.int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:28:4:%lds.int3:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:32:4:%lds.int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:36:4:%lds.int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:40:4:%lds.int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:44:4:%lds.int3:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:48:4:%lds.int0:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:52:4:%lds.int1:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:56:4:%lds.int2:0"; +; HSAIL64: pragma "initvarwithaddress:&p3i32_array_array:60:4:%lds.int3:0"; + + +; HSAIL64: prog readonly_u32 &ptr_to_int_gv_p3i32 = 0; +; HSAIL64: pragma "initvarwithaddress:&ptr_to_int_gv_p3i32:0:4:%lds.int0:0"; + +; HSAIL64: prog readonly_u32 &ptr_to_int_array_gv_p3i32[4] = u32[](0, 0, 0, 0); +; HSAIL64: pragma "initvarwithaddress:&ptr_to_int_array_gv_p3i32:0:4:%lds.int0:0"; +; HSAIL64: pragma "initvarwithaddress:&ptr_to_int_array_gv_p3i32:4:4:%lds.int1:0"; +; HSAIL64: pragma "initvarwithaddress:&ptr_to_int_array_gv_p3i32:8:4:%lds.int2:0"; +; HSAIL64: pragma "initvarwithaddress:&ptr_to_int_array_gv_p3i32:12:4:%lds.int3:0"; + + +@int0 = internal unnamed_addr addrspace(2) constant i32 9 +@int1 = internal unnamed_addr addrspace(2) constant i32 34 +@int2 = internal unnamed_addr addrspace(2) constant i32 123 +@int3 = internal unnamed_addr addrspace(2) constant i32 222 +@int4 = internal unnamed_addr addrspace(2) constant i32 2424 + +@symbol_array = internal addrspace(2) constant [5 x i32 addrspace(2)*] [ i32 addrspace(2)* @int0, i32 addrspace(2)* @int1, i32 addrspace(2)* @int2, i32 addrspace(2)* @int3, i32 addrspace(2)* @int4 ] +@i32_array = internal unnamed_addr addrspace(2) constant [10 x i32] [i32 234, i32 456, i32 789, i32 235, i32 495, i32 1, i32 4, i32 9, i32 10, i32 453], align 4 + + +@constantexpr_address_array = internal addrspace(2) constant [5 x i32 addrspace(2)*] [ + i32 addrspace(2)* getelementptr ([10 x i32], [10 x i32] addrspace(2)* @i32_array, i32 0, i32 0), + i32 addrspace(2)* getelementptr ([10 x i32], [10 x i32] addrspace(2)* @i32_array, i32 0, i32 4), + i32 addrspace(2)* getelementptr ([10 x i32], [10 x i32] addrspace(2)* @i32_array, i32 0, i32 8), + i32 addrspace(2)* getelementptr ([10 x i32], [10 x i32] addrspace(2)* @i32_array, i32 0, i32 9), + i32 addrspace(2)* getelementptr ([10 x i32], [10 x i32] addrspace(2)* @i32_array, i32 0, i32 3) +] + +%struct.foo = type { float, [5 x i32 addrspace(2)*] addrspace(2)* } + +%struct.bar = type { i8, [5 x i32 addrspace(2)*] addrspace(2)*, i8 } +%struct.packed.bar = type <{ i8, [5 x i32 addrspace(2)*] addrspace(2)*, i8 }> + +%struct.mixed.nullptr.sizes = type { i32 addrspace(3)*, i32 addrspace(2)* } + + +@struct_foo_gv = internal unnamed_addr addrspace(2) constant [2 x %struct.foo] [ + %struct.foo { float 16.0, [5 x i32 addrspace(2)*] addrspace(2)* @constantexpr_address_array }, + %struct.foo { float 32.0, [5 x i32 addrspace(2)*] addrspace(2)* @symbol_array } +] + +@struct_bar_gv = internal unnamed_addr addrspace(2) constant [2 x %struct.bar] [ + %struct.bar { i8 7, [5 x i32 addrspace(2)*] addrspace(2)* @constantexpr_address_array, i8 23 }, + %struct.bar { i8 45, [5 x i32 addrspace(2)*] addrspace(2)* @symbol_array, i8 212 } +] + +@struct_packed_bar_gv = internal unnamed_addr addrspace(2) constant [2 x %struct.packed.bar] [ + %struct.packed.bar <{ i8 7, [5 x i32 addrspace(2)*] addrspace(2)* @constantexpr_address_array, i8 23 }>, + %struct.packed.bar <{ i8 45, [5 x i32 addrspace(2)*] addrspace(2)* @symbol_array, i8 212 }> +] + + +@lds.int0 = internal unnamed_addr addrspace(3) global i32 undef +@lds.int1 = internal unnamed_addr addrspace(3) global i32 undef +@lds.int2 = internal unnamed_addr addrspace(3) global i32 undef +@lds.int3 = internal unnamed_addr addrspace(3) global i32 undef +@lds.int4 = internal unnamed_addr addrspace(3) global i32 undef + + +@struct_mixed_nullptr_sizes_0 = internal unnamed_addr addrspace(2) constant [2 x %struct.mixed.nullptr.sizes] zeroinitializer +@struct_mixed_nullptr_sizes_1 = internal unnamed_addr addrspace(2) constant [2 x %struct.mixed.nullptr.sizes] [ + %struct.mixed.nullptr.sizes { i32 addrspace(3)* null, i32 addrspace(2)* @int1 }, + %struct.mixed.nullptr.sizes { i32 addrspace(3)* @lds.int0, i32 addrspace(2)* null } +] + +@lds_symbol_array = internal addrspace(2) constant [5 x i32 addrspace(3)*] [ i32 addrspace(3)* @lds.int0, i32 addrspace(3)* @lds.int1, i32 addrspace(3)* @lds.int2, i32 addrspace(3)* @lds.int3, i32 addrspace(3)* @lds.int4 ] + + + +@scalar_p2i32 = addrspace(2) constant i32 addrspace(2)* @int0 +@scalar_p3i32 = addrspace(2) constant i32 addrspace(3)* @lds.int0 + +@zero_vector_v4p2i32 = addrspace(2) constant <4 x i32 addrspace(2)*> zeroinitializer +@zero_vector_v3p2i32 = addrspace(2) constant <3 x i32 addrspace(3)*> zeroinitializer + +@p2i32_array = addrspace(2) constant [4 x i32 addrspace(2)*] [ i32 addrspace(2)* @int0, i32 addrspace(2)* @int1, i32 addrspace(2)* @int2, i32 addrspace(2)* @int3] + +@p3i32_array = addrspace(2) constant [4 x i32 addrspace(3)*] [ i32 addrspace(3)* @lds.int0, i32 addrspace(3)* @lds.int1, i32 addrspace(3)* @lds.int2, i32 addrspace(3)* @lds.int3] + +@p2i32_array_array = addrspace(2) constant [4 x [4 x i32 addrspace(2)*]] [ + [4 x i32 addrspace(2)*] [ i32 addrspace(2)* @int0, i32 addrspace(2)* @int1, i32 addrspace(2)* @int2, i32 addrspace(2)* @int3], + [4 x i32 addrspace(2)*] [ i32 addrspace(2)* @int0, i32 addrspace(2)* @int1, i32 addrspace(2)* @int2, i32 addrspace(2)* @int3], + [4 x i32 addrspace(2)*] [ i32 addrspace(2)* @int0, i32 addrspace(2)* @int1, i32 addrspace(2)* @int2, i32 addrspace(2)* @int3], + [4 x i32 addrspace(2)*] [ i32 addrspace(2)* @int0, i32 addrspace(2)* @int1, i32 addrspace(2)* @int2, i32 addrspace(2)* @int3] +] + +@p3i32_array_array = addrspace(2) constant [4 x [4 x i32 addrspace(3)*]] [ + [4 x i32 addrspace(3)*] [ i32 addrspace(3)* @lds.int0, i32 addrspace(3)* @lds.int1, i32 addrspace(3)* @lds.int2, i32 addrspace(3)* @lds.int3], + [4 x i32 addrspace(3)*] [ i32 addrspace(3)* @lds.int0, i32 addrspace(3)* @lds.int1, i32 addrspace(3)* @lds.int2, i32 addrspace(3)* @lds.int3], + [4 x i32 addrspace(3)*] [ i32 addrspace(3)* @lds.int0, i32 addrspace(3)* @lds.int1, i32 addrspace(3)* @lds.int2, i32 addrspace(3)* @lds.int3], + [4 x i32 addrspace(3)*] [ i32 addrspace(3)* @lds.int0, i32 addrspace(3)* @lds.int1, i32 addrspace(3)* @lds.int2, i32 addrspace(3)* @lds.int3] +] + +@ptr_to_int_gv_p3i32 = addrspace(2) constant i32 ptrtoint (i32 addrspace(3)* @lds.int0 to i32) + +@ptr_to_int_array_gv_p3i32 = addrspace(2) constant [4 x i32] [ + i32 ptrtoint (i32 addrspace(3)* @lds.int0 to i32), + i32 ptrtoint (i32 addrspace(3)* @lds.int1 to i32), + i32 ptrtoint (i32 addrspace(3)* @lds.int2 to i32), + i32 ptrtoint (i32 addrspace(3)* @lds.int3 to i32) + ] + + +; HSAIL-LABEL: {{^}}prog function &test_symbol_array( +define void @test_symbol_array(i32 addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [5 x i32 addrspace(2)*], [5 x i32 addrspace(2)*] addrspace(2)* @symbol_array, i32 0, i32 %index + %ptr = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(2)* %tmp0 + %tmp1 = load i32, i32 addrspace(2)* %ptr + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_constantexpr_address_array( +define void @test_constantexpr_address_array(i32 addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [5 x i32 addrspace(2)*], [5 x i32 addrspace(2)*] addrspace(2)* @constantexpr_address_array, i32 0, i32 %index + %ptr = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(2)* %tmp0 + %tmp1 = load i32, i32 addrspace(2)* %ptr + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_struct_foo_gv( +define void @test_struct_foo_gv(i32 addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [2 x %struct.foo], [2 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 %index, i32 1 + %tmp1 = load [5 x i32 addrspace(2)*] addrspace(2)*, [5 x i32 addrspace(2)*] addrspace(2)* addrspace(2)* %tmp0 + %tmp2 = getelementptr [5 x i32 addrspace(2)*], [5 x i32 addrspace(2)*] addrspace(2)* %tmp1, i32 0, i32 1 + %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(2)* %tmp2 + %load = load i32, i32 addrspace(2)* %tmp3 + store i32 %load, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_struct_bar_gv( +define void @test_struct_bar_gv(i32 addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [2 x %struct.bar], [2 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 %index, i32 1 + %tmp1 = load [5 x i32 addrspace(2)*] addrspace(2)*, [5 x i32 addrspace(2)*] addrspace(2)* addrspace(2)* %tmp0 + %tmp2 = getelementptr [5 x i32 addrspace(2)*], [5 x i32 addrspace(2)*] addrspace(2)* %tmp1, i32 0, i32 1 + %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(2)* %tmp2 + %load = load i32, i32 addrspace(2)* %tmp3 + store i32 %load, i32 addrspace(1)* %out + ret void +} + +; FIXME: First load using wrong alignment? + +; HSAIL-LABEL: {{^}}prog function &test_packed_struct_bar_gv( +; HSAIL32: mul_u32 [[PTR0:\$s[0-9]+]], $s{{[0-9]+}}, 6; +; HSAIL32: ld_readonly_align(4)_u32 [[PTR1:\$s[0-9]+]], [&struct_packed_bar_gv]{{\[}}[[PTR0]]+1]; +; HSAIL32-NEXT: ld_readonly_align(4)_u32 [[PTR2:\$s[0-9]+]], {{\[}}[[PTR1]]+8]; +; HSAIL32-NEXT: ld_readonly_align(4)_u32 [[VAL:\$s[0-9]+]], {{\[}}[[PTR2]]{{\]}}; +; HSAIL32: st_global_align(4)_u32 [[VAL]] +define void @test_packed_struct_bar_gv(i32 addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [2 x %struct.packed.bar], [2 x %struct.packed.bar] addrspace(2)* @struct_packed_bar_gv, i32 0, i32 %index, i32 1 + %tmp1 = load [5 x i32 addrspace(2)*] addrspace(2)*, [5 x i32 addrspace(2)*] addrspace(2)* addrspace(2)* %tmp0 + %tmp2 = getelementptr [5 x i32 addrspace(2)*], [5 x i32 addrspace(2)*] addrspace(2)* %tmp1, i32 0, i32 2 + %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(2)* %tmp2 + %load = load i32, i32 addrspace(2)* %tmp3 + store i32 %load, i32 addrspace(1)* %out + ret void +} + + +; HSAIL-LABEL: {{^}}prog function &test_mixed_nullptr_sizes_0( +define void @test_mixed_nullptr_sizes_0(i32 addrspace(2)* %arg) { + %gep = getelementptr [2 x %struct.mixed.nullptr.sizes], [2 x %struct.mixed.nullptr.sizes] addrspace(2)* @struct_mixed_nullptr_sizes_0, i32 0, i32 1, i32 1 + %load = load volatile i32 addrspace(2)*, i32 addrspace(2)* addrspace(2)* %gep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_mixed_nullptr_sizes_1( +define void @test_mixed_nullptr_sizes_1(i32 addrspace(2)* %arg) { + %gep = getelementptr [2 x %struct.mixed.nullptr.sizes], [2 x %struct.mixed.nullptr.sizes] addrspace(2)* @struct_mixed_nullptr_sizes_1, i32 0, i32 1, i32 0 + %load = load volatile i32 addrspace(3)*, i32 addrspace(3)* addrspace(2)* %gep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_lds_symbol_array( +define void @test_lds_symbol_array(i32 addrspace(1)* %out, i32 %index) { + %tmp0 = getelementptr inbounds [5 x i32 addrspace(3)*], [5 x i32 addrspace(3)*] addrspace(2)* @lds_symbol_array, i32 0, i32 %index + %ptr = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(2)* %tmp0 + %tmp1 = load i32, i32 addrspace(3)* %ptr + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/hypen-in-func-name.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/hypen-in-func-name.ll @@ -0,0 +1,6 @@ +; XFAIL: * +; RUN: llc -march=hsail < %s + +define void @has-hypens-in-name() { + ret void +} Index: test/CodeGen/HSAIL/i1-argument-ext.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/i1-argument-ext.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=hsail < %s + + +; HSAIL: decl prog function &i1_sext_ret(arg_s32 %ret)(); + +; HSAIL: decl prog function &i1_zext_ret(arg_u32 %ret)(); + +; HSAIL-LABEL: {{^}}prog function &i1_sext_ret(arg_s32 %i1_sext_ret)() +; HSAIL: st_arg_u32 {{\$s[0-9]+}}, [%i1_sext_ret]; +define signext i1 @i1_sext_ret() { + ret i1 true +} + +; HSAIL-LABL: {{^}}prog function &i1_zext_ret(arg_u32 %i1_zext_ret)() +; HSAIL: st_arg_u32 {{\$s[0-9]+}}, [%i1_zext_ret]; +define zeroext i1 @i1_zext_ret() { + ret i1 true +} + +define zeroext i8 @i8_zext_ret() { + ret i8 123 +} + +define zeroext i8 @i8_zext_ret_arg(i8 %x) { + ret i8 %x +} Index: test/CodeGen/HSAIL/i1-argument.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/i1-argument.ll @@ -0,0 +1,26 @@ +; RUN: llc -debug -march=hsail -verify-machineinstrs < %s + +; This hit an assert with -debug-only + +; HSAIL: decl prog function &i1_sext_arg()(arg_u8 %x); +; HSAIL: decl prog function &i1_zext_arg()(arg_u8 %x); +; HSAIL: decl prog function &i1_arg()(arg_u8 %x); + + +; HSAIL-LABEL: prog function &i1_sext_arg()(arg_s8 %x) +; HSAIL: ld_arg_s8 {{\$s[0-9]+}}, [%x]; +define i1 @i1_sext_arg(i1 signext %x) { + ret i1 %x +} + +; HSAIL-LABEL: prog function &i1_zext_arg()(arg_u8 %x) +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%x]; +define i1 @i1_zext_arg(i1 zeroext %x) { + ret i1 %x +} + +; HSAIL-LABEL: prog function &i1_arg()(arg_u8 %x) +; HSAIL: ld_arg_u8 {{\$s[0-9]+}}, [%x]; +define i1 @i1_arg(i1 %x) { + ret i1 %x +} Index: test/CodeGen/HSAIL/i8-to-double-to-float.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/i8-to-double-to-float.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=hsail -filetype=asm < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test +; HSAIL: cvt_f64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_ftz_f32_f64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) { + %tmp1 = load i8, i8 addrspace(1)* %in + %tmp2 = uitofp i8 %tmp1 to double + %tmp3 = fptrunc double %tmp2 to float + store float %tmp3, float addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/icmp-select-sete-reverse-args.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/icmp-select-sete-reverse-args.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 0, 4294967295; +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %tmp0 = load i32, i32 addrspace(1)* %in + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 + %tmp1 = load i32, i32 addrspace(1)* %arrayidx1 + %cmp = icmp eq i32 %tmp0, %tmp1 + %value = select i1 %cmp, i32 0, i32 -1 + store i32 %value, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/icmp64.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/icmp64.ll @@ -0,0 +1,100 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_i64_eq +; HSAIL: cmp_eq_b1_s64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp eq i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_ne +; HSAIL: cmp_ne_b1_s64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp ne i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_slt +; HSAIL: cmp_lt_b1_s64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp slt i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} +; FUNC-LABEL: {{^}}prog function &test_i64_ult +; HSAIL: cmp_lt_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp ult i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_sle +; HSAIL: cmp_le_b1_s64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp sle i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_ule +; HSAIL: cmp_le_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp ule i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_sgt +; HSAIL: cmp_gt_b1_s64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp sgt i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_ugt +; HSAIL: cmp_gt_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp ugt i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_sge +; HSAIL: cmp_ge_b1_s64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp sge i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_i64_uge +; HSAIL: cmp_ge_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cmov_b32 +define void @test_i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %cmp = icmp uge i64 %a, %b + %result = sext i1 %cmp to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/HSAIL/imm.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/imm.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &i64_imm_inline_lo +; HSAIL: st_global_align(8)_u64 1311768464867721221, [$s{{[0-9]+}}]; +define void @i64_imm_inline_lo(i64 addrspace(1) *%out) { + store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005 + ret void +} + +; FUNC-LABEL: {{^}}prog function &i64_imm_inline_hi +; HSAIL: st_global_align(8)_u64 21780256376, [$s{{[0-9]+}}]; +define void @i64_imm_inline_hi(i64 addrspace(1) *%out) { +entry: + store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678 + ret void +} Index: test/CodeGen/HSAIL/indent-call.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/indent-call.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=HSAIL %s + +declare i32 @foo(i32 %x) #0 + +; HSAIL-LABEL: {{^}}prog function &call_foo(arg_u32 %call_foo)() +; HSAIL-NEXT: {{^}}{ +; HSAIL: {{^}} mov_b32 $s0, 123; +; HSAIL: {{^}} { +; HSAIL-NEXT: {{^}} arg_u32 %foo; +; HSAIL-NEXT: {{^}} arg_u32 %x; +; HSAIL-NEXT: {{^}} st_arg_align(4)_u32 $s0, [%x]; +; HSAIL-NEXT: {{^}} call &foo (%foo) (%x); +; HSAIL-NEXT: {{^}} ld_arg_align(4)_u32 $s0, [%foo]; +; HSAIL-NEXT: {{^}} } +; HSAIL-NEXT: {{^}} st_arg_align(4)_u32 $s0, [%call_foo]; +; HSAIL-NEXT: {{^}} ret; +; HSAIL-NEXT:{{^}}}; +define i32 @call_foo() #0 { + %ret = call i32 @foo(i32 123) #0 + ret i32 %ret +} + + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/ineg.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ineg.ll @@ -0,0 +1,95 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &ineg_s32( +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @ineg_s32(i32 addrspace(1)* %out, i32 %in) { + %ineg = sub i32 0, %in + store i32 %ineg, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_v2i32( +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @ineg_v2i32(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) { + %ineg = sub <2 x i32> zeroinitializer, %in + store <2 x i32> %ineg, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_v4i32( +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @ineg_v4i32(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) { + %ineg = sub <4 x i32> zeroinitializer, %in + store <4 x i32> %ineg, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_free_i32( +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @ineg_free_i32(i32 addrspace(1)* %out, i32 %in) { + %bc = bitcast i32 %in to i32 + %sub = sub i32 0, %bc + store i32 %sub, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_fold_i32( +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @ineg_fold_i32(i32 addrspace(1)* %out, i32 %in) { + %sub = sub i32 0, %in + %mul = mul i32 %sub, %in + store i32 %mul, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_s64( +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @ineg_s64(i64 addrspace(1)* %out, i64 %in) { + %ineg = sub i64 0, %in + store i64 %ineg, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_v2i64( +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @ineg_v2i64(<2 x i64> addrspace(1)* nocapture %out, <2 x i64> %in) { + %ineg = sub <2 x i64> zeroinitializer, %in + store <2 x i64> %ineg, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_v4i64( +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @ineg_v4i64(<4 x i64> addrspace(1)* nocapture %out, <4 x i64> %in) { + %ineg = sub <4 x i64> zeroinitializer, %in + store <4 x i64> %ineg, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_free_i64( +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @ineg_free_i64(i64 addrspace(1)* %out, i64 %in) { + %bc = bitcast i64 %in to i64 + %sub = sub i64 0, %bc + store i64 %sub, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ineg_fold_i64( +; HSAIL: neg_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @ineg_fold_i64(i64 addrspace(1)* %out, i64 %in) { + %sub = sub i64 0, %in + %mul = mul i64 %sub, %in + store i64 %mul, i64 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/infinite-loop.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/infinite-loop.ll @@ -0,0 +1,16 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &infinite_loop( +; HSAIL: ld_arg +; HSAIL: {{^@BB0_1:}} +; HSAIL: st_global_align(4)_u32 +; HSAIL-NEXT: br @BB0_1 +; HSAIL: }; +define void @infinite_loop(i32 addrspace(1)* %out) { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + store i32 999, i32 addrspace(1)* %out, align 4 + br label %for.body +} Index: test/CodeGen/HSAIL/int32to64.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/int32to64.ll @@ -0,0 +1,84 @@ +; XFAIL: * +; RUN: opt -O3 -march=hsail-64 < %s | llc -march=hsail-64 | FileCheck %s + +target triple = "hsail64-pc-unknown-amdopencl" + +declare i64 @get_group_id(i32) nounwind readnone + +define void @__OpenCL_non_profitable_kernel(i32 addrspace(1)* noalias %rowDelimiters, float addrspace(1)* noalias %out) nounwind { +; CHECK-LABEL: __OpenCL_non_profitable_kernel +; CHECK: {{add_u32 \$s[0-9]+, \$s[0-9]+, 5;}} +; CHECK: {{ld_global_.*_u32 .*, \[\$d[0-9]+\];}} +entry: + %rowDelimiters.addr = alloca i32 addrspace(1)*, align 8 + %out.addr = alloca float addrspace(1)*, align 8 + %aaa = alloca i32, align 4 + %t = alloca i32, align 4 + store i32 addrspace(1)* %rowDelimiters, i32 addrspace(1)** %rowDelimiters.addr, align 8 + store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8 + %call = call i64 @get_group_id(i32 0) nounwind + %conv = trunc i64 %call to i32 + store i32 %conv, i32* %aaa, align 4 + %tmp = load i32 addrspace(1)*, i32 addrspace(1)** %rowDelimiters.addr, align 8 + %tmp1 = load i32, i32* %aaa, align 4 + %tmp2 = add nsw i32 %tmp1, 5 + %conv3 = sext i32 %tmp2 to i64 + %arrayidx = getelementptr i32, i32 addrspace(1)* %tmp, i64 %conv3 + %tmp4 = load i32, i32 addrspace(1)* %arrayidx, align 4 + store i32 %tmp4, i32* %t, align 4 + %tmp5 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8 + %tmp6 = load i32, i32* %aaa, align 4 + %conv7 = sext i32 %tmp6 to i64 + %arrayidx8 = getelementptr float, float addrspace(1)* %tmp5, i64 %conv7 + %tmp9 = load i32, i32* %t, align 4 + %conv10 = sitofp i32 %tmp9 to float + store float %conv10, float addrspace(1)* %arrayidx8, align 4 + br label %return + +return: ; preds = %entry + ret void +} + +define void @__OpenCL_profitable_kernel(i32 addrspace(1)* noalias %rowDelimiters, float addrspace(1)* noalias %out) nounwind { +; CHECK-LABEL: __OpenCL_profitable_kernel +; CHECK: {{ld_global_.*_u32 .*, \[\$d[0-9]+\+20\];}} +; CHECK: {{ld_global_.*_u32 .*, \[\$d[0-9]+\+40\];}} +entry: + %rowDelimiters.addr = alloca i32 addrspace(1)*, align 8 + %out.addr = alloca float addrspace(1)*, align 8 + %aaa = alloca i32, align 4 + %t = alloca i32, align 4 + %t2 = alloca i32, align 4 + store i32 addrspace(1)* %rowDelimiters, i32 addrspace(1)** %rowDelimiters.addr, align 8 + store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8 + %call = call i64 @get_group_id(i32 0) nounwind + %conv = trunc i64 %call to i32 + store i32 %conv, i32* %aaa, align 4 + %tmp = load i32 addrspace(1)*, i32 addrspace(1)** %rowDelimiters.addr, align 8 + %tmp1 = load i32, i32* %aaa, align 4 + %tmp2 = add nsw i32 %tmp1, 5 + %conv3 = sext i32 %tmp2 to i64 + %arrayidx = getelementptr i32, i32 addrspace(1)* %tmp, i64 %conv3 + %tmp4 = load i32, i32 addrspace(1)* %arrayidx, align 4 + store i32 %tmp4, i32* %t, align 4 + %tmp5 = load i32 addrspace(1)*, i32 addrspace(1)** %rowDelimiters.addr, align 8 + %tmp6 = load i32, i32* %aaa, align 4 + %tmp7 = add nsw i32 %tmp6, 10 + %conv8 = sext i32 %tmp7 to i64 + %arrayidx9 = getelementptr i32, i32 addrspace(1)* %tmp5, i64 %conv8 + %tmp10 = load i32, i32 addrspace(1)* %arrayidx9, align 4 + store i32 %tmp10, i32* %t2, align 4 + %tmp11 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8 + %tmp12 = load i32, i32* %aaa, align 4 + %conv13 = sext i32 %tmp12 to i64 + %arrayidx14 = getelementptr float, float addrspace(1)* %tmp11, i64 %conv13 + %tmp15 = load i32, i32* %t, align 4 + %tmp16 = load i32, i32* %t2, align 4 + %tmp17 = add nsw i32 %tmp15, %tmp16 + %conv18 = sitofp i32 %tmp17 to float + store float %conv18, float addrspace(1)* %arrayidx14, align 4 + br label %return + +return: ; preds = %entry + ret void +} Index: test/CodeGen/HSAIL/kernarg-align.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/kernarg-align.ll @@ -0,0 +1,9 @@ +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog kernel &no_align_on_kernarg( +; HSAIL-NOT: align(4) +; HSAIL: kernarg_u64 +define spir_kernel void @no_align_on_kernarg(i32 addrspace(1)* %out) { + store i32 999, i32 addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/HSAIL/kernel-args.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/kernel-args.ll @@ -0,0 +1,494 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog kernel &i8_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_u8 %in) +; HSAIL-NEXT: { +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 255; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +define spir_kernel void @i8_kernarg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { + %tmp0 = zext i8 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &i8_zext_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_u8 %in) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in]; +define spir_kernel void @i8_zext_kernarg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { + %tmp0 = zext i8 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &i8_sext_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_s8 %in) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_width(all)_s8 {{\$s[0-9]+}}, [%in]; +define spir_kernel void @i8_sext_kernarg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { + %tmp0 = sext i8 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &i16_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_u16 %in) +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 65535; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +define spir_kernel void @i16_kernarg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { + %tmp0 = zext i16 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &i16_zext_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_u16 %in) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in]; +define spir_kernel void @i16_zext_kernarg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { + %tmp0 = zext i16 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &i16_sext_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_s16 %in) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(2)_width(all)_s16 {{\$s[0-9]+}}, [%in]; +define spir_kernel void @i16_sext_kernarg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { + %tmp0 = sext i16 %in to i32 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &i32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_u32 %in) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in]; +define spir_kernel void @i32_kernarg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { + store i32 %in, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &f32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_f32 %in) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in]; +define spir_kernel void @f32_kernarg(float addrspace(1)* nocapture %out, float %in) nounwind { + store float %in, float addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v2i8_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(2) kernarg_u8 %in[2]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][1]; +define spir_kernel void @v2i8_kernarg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { + store <2 x i8> %in, <2 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v2i16_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(4) kernarg_u16 %in[2]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][2]; +define spir_kernel void @v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { + store <2 x i16> %in, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v2i32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(8) kernarg_u32 %in[2]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][4]; +define spir_kernel void @v2i32_kernarg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { + store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v2f32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(8) kernarg_f32 %in[2]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][4]; +define spir_kernel void @v2f32_kernarg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { + store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v3i8_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(4) kernarg_u8 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][2]; +define spir_kernel void @v3i8_kernarg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { + store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v3i16_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(8) kernarg_u16 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: shl_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 16; +; HSAIL: and_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 65535; +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define spir_kernel void @v3i16_kernarg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { + store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v3i32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(16) kernarg_u32 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][8]; + +; HSAIL-DAG: st_global_align(4)_u64 {{\$d[0-9]+}}, {{\[}}[[OUT]]{{\]}}; +; HSAIL-DAG: st_global_align(4)_u32 {{\$s[0-9]+}}, {{\[}}[[OUT]]+8]; + +; XHSAIL: pack_u32x2_u32 {{\$d[0-9]+}}, _u32x2(0,0), {{\$s[0-9]+}}, 1; +; XHSAIL: ld_kernarg_align(16)_u32 {{\$s[0-9]+}}, [%in]; +; XHSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; XHSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; + +; HSAIL: ret; +define spir_kernel void @v3i32_kernarg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { + store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v3f32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(16) kernarg_f32 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][8]; + +; XHSAIL: pack_u32x2_u32 {{\$d[0-9]+}}, _u32x2(0,0), {{\$s[0-9]+}}, 1; +; XHSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; XHSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; + +; HSAIL: ret; +define spir_kernel void @v3f32_kernarg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { + store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v4i8_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(4) kernarg_u8 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][2]; +define spir_kernel void @v4i8_kernarg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { + store <4 x i8> %in, <4 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v4i16_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(8) kernarg_u16 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][6]; +define spir_kernel void @v4i16_kernarg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { + store <4 x i16> %in, <4 x i16> addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}prog kernel &v4i32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(16) kernarg_u32 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][12]; +define spir_kernel void @v4i32_kernarg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { + store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v4f32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(16) kernarg_f32 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][12]; +define spir_kernel void @v4f32_kernarg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { + store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 + ret void +} + + +; FUNC-LABEL: {{^}}prog kernel &v8i8_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(8) kernarg_u8 %in[8]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][1]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][3]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][5]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][6]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][7]; +; HSAIL: ret; +define spir_kernel void @v8i8_kernarg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { + store <8 x i8> %in, <8 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v8i16_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(16) kernarg_u16 %in[8]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][6]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][10]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][14]; +define spir_kernel void @v8i16_kernarg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { + store <8 x i16> %in, <8 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v8i32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(32) kernarg_u32 %in[8]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][16]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][20]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][24]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][28]; +define spir_kernel void @v8i32_kernarg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { + store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v8f32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(32) kernarg_f32 %in[8]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][16]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][20]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][24]; +; HSAIL: ld_kernarg_align(4)_width(all)_f32 {{\$s[0-9]+}}, [%in][28]; +define spir_kernel void @v8f32_kernarg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { + store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v16i8_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(16) kernarg_u8 %in[16]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][3]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][5]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][6]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][7]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][9]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][10]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][11]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][13]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][14]; +; HSAIL: ld_kernarg_width(all)_u8 {{\$s[0-9]+}}, [%in][15]; +define spir_kernel void @v16i8_kernarg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { + store <16 x i8> %in, <16 x i8> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v16i16_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(32) kernarg_u16 %in[16]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][2]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][6]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][10]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][14]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][16]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][18]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][20]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][22]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][24]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][26]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][28]; +; HSAIL: ld_kernarg_align(2)_width(all)_u16 {{\$s[0-9]+}}, [%in][30]; +define spir_kernel void @v16i16_kernarg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { + store <16 x i16> %in, <16 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v16i32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(64) kernarg_u32 %in[16]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][4]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][12]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][16]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][20]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][24]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][28]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][32]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][36]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][40]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][44]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][48]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][52]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][56]; +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%in][60]; +; HSAIL: ret; +define spir_kernel void @v16i32_kernarg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { + store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v16f32_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(64) kernarg_f32 %in[16]) +define spir_kernel void @v16f32_kernarg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { + store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &i64_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_u64 %a) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%a]; +define spir_kernel void @i64_kernarg(i64 addrspace(1)* %out, i64 %a) nounwind { + store i64 %a, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v2i64_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(16) kernarg_u64 %in[2]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][8]; +define spir_kernel void @v2i64_kernarg(<2 x i64> addrspace(1)* nocapture %out, <2 x i64> %in) nounwind { + store <2 x i64> %in, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v3i64_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(32) kernarg_u64 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][16]; +define spir_kernel void @v3i64_kernarg(<3 x i64> addrspace(1)* nocapture %out, <3 x i64> %in) nounwind { + store <3 x i64> %in, <3 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v4i64_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(32) kernarg_u64 %in[4]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][16]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][24]; +define spir_kernel void @v4i64_kernarg(<4 x i64> addrspace(1)* nocapture %out, <4 x i64> %in) nounwind { + store <4 x i64> %in, <4 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &v8i64_kernarg( +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(64) kernarg_u64 %in[8]) +; HSAIL: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%out]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][8]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][16]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][24]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][32]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][40]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][48]; +; HSAIL: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, [%in][56]; +define spir_kernel void @v8i64_kernarg(<8 x i64> addrspace(1)* nocapture %out, <8 x i64> %in) nounwind { + store <8 x i64> %in, <8 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog kernel &array_4xi32_kernarg({{$}} +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: kernarg_u32 %x[4]) +; HSAIL-NEXT: { +; HSAIL-DAG: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%x]; +; HSAIL-DAG: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%x][4]; +; HSAIL-DAG: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%x][8]; +; HSAIL-DAG: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%x][12]; +; HSAIL: ret; +define spir_kernel void @array_4xi32_kernarg([4 x i32] addrspace(1)* %out, [4 x i32] %x) nounwind { + store [4 x i32] %x, [4 x i32] addrspace(1)* %out + ret void +} + +%struct.i32pair = type { i32, i32 } + +; FUNC-LABEL: {{^}}prog kernel &struct_kernarg({{$}} +; HSAIL-NEXT: kernarg_u32 %out, +; HSAIL-NEXT: align(4) kernarg_u8 %x[8]) +; HSAIL-DAG: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%x]; +; HSAIL-DAG: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%x][4]; +; HSAIL: ret; +define spir_kernel void @struct_kernarg(%struct.i32pair addrspace(1)* %out, %struct.i32pair %x) { + store %struct.i32pair %x, %struct.i32pair addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/linkage-types.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/linkage-types.ll @@ -0,0 +1,108 @@ +; RUN: llc -march=hsail64 < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL: {{^}}readonly_u32 &private_gv[1] = u32[](1); +@private_gv = private addrspace(2) constant [1 x i32] [i32 1], align 4 + +; HSAIL: {{^}}readonly_u32 &internal_gv[1] = u32[](1); +@internal_gv = internal addrspace(2) constant [1 x i32] [i32 1], align 4 + +; HSAIL: {{^}}prog readonly_u32 &available_externally_gv[1] = u32[](1); +@available_externally_gv = available_externally addrspace(2) constant [1 x i32] [i32 1], align 4 + +; HSAIL: {{^}}readonly_u32 &linkonce_gv[1] = u32[](1); +@linkonce_gv = linkonce addrspace(2) constant [1 x i32] [i32 1], align 4 + +; HSAIL: {{^}}prog readonly_u32 &weak_gv[1] = u32[](1); +@weak_gv = weak addrspace(2) constant [1 x i32] [i32 1], align 4 + +; HSAIL: {{^}}readonly_u32 &common_gv[1] = u32[](0); +@common_gv = common addrspace(2) global [1 x i32] zeroinitializer, align 4 + +; HSAIL: {{^}}prog readonly_u32 &appending_gv[1] = u32[](0); +@appending_gv = appending addrspace(2) global [1 x i32] zeroinitializer, align 4 + +; HSAIL: {{^}}prog readonly_u32 &extern_weak_gv[1]; +@extern_weak_gv = extern_weak addrspace(2) global [1 x i32], align 4 + +; HSAIL: {{^}}readonly_u32 &linkonce_odr_gv[1] = u32[](1); +@linkonce_odr_gv = linkonce_odr addrspace(2) constant [1 x i32] [i32 1], align 4 + +; HSAIL: {{^}}prog readonly_u32 &weak_odr_gv[1] = u32[](1); +@weak_odr_gv = weak_odr addrspace(2) constant [1 x i32] [i32 1], align 4 + +; HSAIL: {{^}}prog readonly_u32 &external_gv[1]; +@external_gv = external addrspace(2) global [1 x i32], align 4 + +; HSAIL: {{^}}decl function &linkonce_odr_func( +; HSAIL: {{^}}function &linkonce_odr_func +define linkonce_odr i32 @linkonce_odr_func(i32) { + ret i32 undef +} + +define i32 @private_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @private_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @internal_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @internal_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @available_externally_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @available_externally_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @linkonce_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @linkonce_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @weak_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @weak_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @common_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @common_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @appending_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @appending_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @extern_weak_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @extern_weak_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @linkonce_odr_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @linkonce_odr_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @weak_odr_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @weak_odr_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +define i32 @external_gv_user() #0 { + %tmp = getelementptr inbounds [1 x i32], [1 x i32] addrspace(2)* @external_gv, i64 0, i64 undef + %val = load i32, i32 addrspace(2)* %tmp + ret i32 %val +} + +attributes #0 = { nounwind uwtable } Index: test/CodeGen/HSAIL/lit.local.cfg =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'HSAIL' in config.root.targets: + config.unsupported = True Index: test/CodeGen/HSAIL/llvm.hsail.barrier.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.barrier.ll @@ -0,0 +1,80 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &test_barrier_all( +; HSAIL: barrier; +; HSAIL-NEXT: ret; +define void @test_barrier_all(i32 addrspace(1)* %out) #1 { + call void @llvm.hsail.barrier(i32 34) #2 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_barrier_wavesize( +; HSAIL: barrier_width(WAVESIZE); +; HSAIL-NEXT: ret; +define void @test_barrier_wavesize(i32 addrspace(1)* %out) #1 { + call void @llvm.hsail.barrier(i32 33) #2 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_barrier_1( +; HSAIL: barrier_width(1); +; HSAIL-NEXT: ret; +define void @test_barrier_1(i32 addrspace(1)* %out) #1 { + call void @llvm.hsail.barrier(i32 1) #2 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_barrier_64( +; HSAIL: barrier_width(64); +; HSAIL-NEXT: ret; +define void @test_barrier_64(i32 addrspace(1)* %out) #1 { + call void @llvm.hsail.barrier(i32 7) #2 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_barrier_2147483648( +; HSAIL: barrier_width(2147483648); +; HSAIL-NEXT: ret; +define void @test_barrier_2147483648(i32 addrspace(1)* %out) #1 { + call void @llvm.hsail.barrier(i32 32) #2 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_barrier_mem( +; HSAIL: st_global_align(4)_u32 +; HSAIL: barrier; +; HSAIL: ld_global_align(4)_u32 +define void @test_barrier_mem(i32 addrspace(1)* %out) #1 { + %tmp = call i32 @llvm.HSAIL.get.global.id(i32 0) + %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp + store i32 %tmp, i32 addrspace(1)* %tmp1 + call void @llvm.hsail.barrier(i32 34) #1 + %tmp2 = call i32 @llvm.HSAIL.workgroup.size(i32 0) #0 + %tmp3 = sub i32 %tmp2, 1 + %tmp4 = sub i32 %tmp3, %tmp + %tmp5 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp4 + %tmp6 = load i32, i32 addrspace(1)* %tmp5 + store i32 %tmp6, i32 addrspace(1)* %tmp1 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_barrier_0( +; HSAIL: barrier; +; HSAIL-NEXT: ret; +define void @test_legacy_barrier_0(i32 addrspace(1)* %out) #1 { + call void @llvm.HSAIL.barrier() #1 + ret void +} + +declare void @llvm.HSAIL.barrier() #1 +declare void @llvm.hsail.barrier(i32) #2 + +; Function Attrs: nounwind readnone +declare i32 @llvm.HSAIL.get.global.id(i32) #0 + +; Function Attrs: nounwind readnone +declare i32 @llvm.HSAIL.workgroup.size(i32) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind noduplicate } +attributes #2 = { nounwind noduplicate convergent } Index: test/CodeGen/HSAIL/llvm.hsail.bitalign.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.bitalign.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.bitalign(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.bitalign.b32(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_bitalign_i32( +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_bitalign_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.bitalign(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitalign_i32_rii( +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, 2; +define i32 @test_bitalign_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.bitalign(i32 %x, i32 1, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitalign_i32_rir( +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_bitalign_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.bitalign(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitalign_i32_rri( +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_bitalign_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.bitalign(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitalign_i32_iri( +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, 3, {{\$s[0-9]+}}, 7; +define i32 @test_bitalign_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.bitalign(i32 3, i32 %x, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitalign_i32_iir( +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, 3, 9, {{\$s[0-9]+}}; +define i32 @test_bitalign_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.bitalign(i32 3, i32 9, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitalign_i32_iii( +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, 3, 9, 11; +define i32 @test_bitalign_i32_iii() #0 { + %val = call i32 @llvm.hsail.bitalign(i32 3, i32 9, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_bitalign_i32( +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_bitalign_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.bitalign.b32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.bitmask.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.bitmask.ll @@ -0,0 +1,35 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.bitmask.i32(i32, i32) #0 +declare i64 @llvm.hsail.bitmask.i64(i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_bitmask_i32_rr( +; HSAIL: bitmask_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_bitmask_i32_rr(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.bitmask.i32(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitmask_i32_ri( +; HSAIL: bitmask_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_bitmask_i32_ri(i32 %x) #0 { + %val = call i32 @llvm.hsail.bitmask.i32(i32 %x, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitmask_i32_ir( +; HSAIL: bitmask_b32 {{\$s[0-9]+}}, 7, {{\$s[0-9]+}}; +define i32 @test_bitmask_i32_ir(i32 %x) #0 { + %val = call i32 @llvm.hsail.bitmask.i32(i32 7, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitmask_i64_rr( +; HSAIL: bitmask_b64 {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i64 @test_bitmask_i64_rr(i32 %x, i32 %y) #0 { + %val = call i64 @llvm.hsail.bitmask.i64(i32 %x, i32 %y) #0 + ret i64 %val +} + + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.bitrev.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.bitrev.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.bitrev.i32(i32) #0 +declare i64 @llvm.hsail.bitrev.i64(i64) #0 + +; HSAIL-LABEL: {{^}}prog function &test_bitrev_i32( +; HSAIL: bitrev_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_bitrev_i32(i32 %x) #0 { + %val = call i32 @llvm.hsail.bitrev.i32(i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitrev_i64( +; HSAIL: bitrev_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_bitrev_i64(i64 %x) #0 { + %val = call i64 @llvm.hsail.bitrev.i64(i64 %x) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.bitselect.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.bitselect.ll @@ -0,0 +1,38 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.bitselect.i32(i32, i32, i32) #0 +declare i64 @llvm.hsail.bitselect.i64(i64, i64, i64) #0 + +declare i32 @llvm.HSAIL.bitselect.u32(i32, i32, i32) #0 +declare i64 @llvm.HSAIL.bitselect.u64(i64, i64, i64) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_bitselect_u32( +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_bitselect_u32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.bitselect.i32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bitselect_u64( +; HSAIL: bitselect_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_bitselect_u64(i64 %x, i64 %y, i64 %z) #0 { + %val = call i64 @llvm.hsail.bitselect.i64(i64 %x, i64 %y, i64 %z) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_bitselect_u32( +; HSAIL: bitselect_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_bitselect_u32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.bitselect.u32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_bitselect_u64( +; HSAIL: bitselect_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_legacy_bitselect_u64(i64 %x, i64 %y, i64 %z) #0 { + %val = call i64 @llvm.HSAIL.bitselect.u64(i64 %x, i64 %y, i64 %z) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.bytealign.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.bytealign.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.bytealign(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.bytealign.b32(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_bytealign_i32( +; HSAIL: bytealign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_bytealign_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.bytealign(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_bytealign_i32( +; HSAIL: bytealign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_bytealign_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.bytealign.b32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.clock.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.clock.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i64 @llvm.hsail.clock() #0 +declare i64 @llvm.HSAIL.get.clock() #1 + +; HSAIL-LABEL: {{^}}prog function &test_clock( +; HSAIL: clock_u64 {{\$d[0-9]+}}; +define void @test_clock(i64 addrspace(1)* %out) #0 { + %tmp0 = call i64 @llvm.hsail.clock() #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_clock( +; HSAIL: clock_u64 {{\$d[0-9]+}}; +define void @test_legacy_clock(i64 addrspace(1)* %out) #0 { + %tmp0 = call i64 @llvm.HSAIL.get.clock() #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.cuid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.cuid.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.cuid() #0 +declare i32 @llvm.HSAIL.get.cu() #1 + +; FUNC-LABEL: {{^}}prog function &test_cuid( +; HSAIL: cuid_u32 {{\$s[0-9]+}}; +define void @test_cuid(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.cuid() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_get_cu( +; HSAIL: cuid_u32 {{\$s[0-9]+}}; +define void @test_legacy_get_cu(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.cu() #1 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.currentworkgroupsize.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.currentworkgroupsize.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.currentworkgroupsize(i32) #0 +declare i32 @llvm.HSAIL.currentworkgroup.size(i32) #0 + +; FUNC-LABEL: {{^}}prog function &test_currentworkgroupsize_0( +; HSAIL: currentworkgroupsize_u32 {{\$s[0-9]+}}, 0; +define void @test_currentworkgroupsize_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.currentworkgroupsize(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_currentworkgroupsize_1( +; HSAIL: currentworkgroupsize_u32 {{\$s[0-9]+}}, 1; +define void @test_currentworkgroupsize_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.currentworkgroupsize(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_currentworkgroupsize_2( +; HSAIL: currentworkgroupsize_u32 {{\$s[0-9]+}}, 2; +define void @test_currentworkgroupsize_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.currentworkgroupsize(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_currentworkgroup_size_0( +; HSAIL: currentworkgroupsize_u32 {{\$s[0-9]+}}, 0; +define void @legacy_currentworkgroup_size_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.currentworkgroup.size(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_currentworkgroup_size_1( +; HSAIL: currentworkgroupsize_u32 {{\$s[0-9]+}}, 1; +define void @legacy_currentworkgroup_size_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.currentworkgroup.size(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_currentworkgroup_size_2( +; HSAIL: currentworkgroupsize_u32 {{\$s[0-9]+}}, 2; +define void @legacy_currentworkgroup_size_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.currentworkgroup.size(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.currentworkitemflatid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.currentworkitemflatid.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.currentworkitemflatid() #0 + +; FUNC-LABEL: {{^}}prog function &test_currentworkitemflatid( +; HSAIL: currentworkitemflatid_u32 {{\$s[0-9]+}}; +define void @test_currentworkitemflatid(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.currentworkitemflatid() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.dim.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.dim.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.dim() #0 +declare i32 @llvm.HSAIL.get.work.dim() #0 + +; FUNC-LABEL: {{^}}prog function &test_dim( +; HSAIL: dim_u32 {{\$s[0-9]+}}; +define void @test_dim(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.dim() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_get_work_dim( +; HSAIL: dim_u32 {{\$s[0-9]+}}; +define void @test_legacy_get_work_dim(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.work.dim() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.fadd.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.fadd.ll @@ -0,0 +1,105 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.fadd.f32(i1, i32, float, float) #0 +declare double @llvm.hsail.fadd.f64(i1, i32, double, double) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_add_default_f32( +; HSAIL: add_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_add_default_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fadd.f32(i1 false, i32 1, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_near_f32( +; HSAIL: add_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_add_near_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fadd.f32(i1 false, i32 2, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_zero_f32( +; HSAIL: add_zero_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_add_zero_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fadd.f32(i1 false, i32 3, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_up_f32( +; HSAIL: add_up_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_add_up_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fadd.f32(i1 false, i32 4, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_down_f32( +; HSAIL: add_down_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_add_down_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fadd.f32(i1 false, i32 5, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_ftz_default_f32( +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_add_ftz_default_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fadd.f32(i1 true, i32 1, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_ftz_near_even_f32( +; HSAIL: add_ftz_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_add_ftz_near_even_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fadd.f32(i1 true, i32 2, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_default_f64( +; HSAIL: add_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_add_default_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fadd.f64(i1 false, i32 1, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_near_f64( +; HSAIL: add_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_add_near_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fadd.f64(i1 false, i32 2, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_zero_f64( +; HSAIL: add_zero_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_add_zero_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fadd.f64(i1 false, i32 3, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_up_f64( +; HSAIL: add_up_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_add_up_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fadd.f64(i1 false, i32 4, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_down_f64( +; HSAIL: add_down_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_add_down_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fadd.f64(i1 false, i32 5, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_ftz_default_f64( +; HSAIL: add_ftz_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_add_ftz_default_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fadd.f64(i1 true, i32 1, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_add_ftz_near_even_f64( +; HSAIL: add_ftz_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_add_ftz_near_even_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fadd.f64(i1 true, i32 2, double %x, double %y) #0 + ret double %ret +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.fdiv.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.fdiv.ll @@ -0,0 +1,105 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.fdiv.f32(i1, i32, float, float) #0 +declare double @llvm.hsail.fdiv.f64(i1, i32, double, double) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_div_default_f32( +; HSAIL: div_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_div_default_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fdiv.f32(i1 false, i32 1, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_near_f32( +; HSAIL: div_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_div_near_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fdiv.f32(i1 false, i32 2, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_zero_f32( +; HSAIL: div_zero_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_div_zero_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fdiv.f32(i1 false, i32 3, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_up_f32( +; HSAIL: div_up_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_div_up_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fdiv.f32(i1 false, i32 4, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_down_f32( +; HSAIL: div_down_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_div_down_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fdiv.f32(i1 false, i32 5, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_ftz_default_f32( +; HSAIL: div_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_div_ftz_default_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fdiv.f32(i1 true, i32 1, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_ftz_near_even_f32( +; HSAIL: div_ftz_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_div_ftz_near_even_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fdiv.f32(i1 true, i32 2, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_default_f64( +; HSAIL: div_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_div_default_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fdiv.f64(i1 false, i32 1, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_near_f64( +; HSAIL: div_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_div_near_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fdiv.f64(i1 false, i32 2, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_zero_f64( +; HSAIL: div_zero_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_div_zero_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fdiv.f64(i1 false, i32 3, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_up_f64( +; HSAIL: div_up_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_div_up_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fdiv.f64(i1 false, i32 4, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_down_f64( +; HSAIL: div_down_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_div_down_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fdiv.f64(i1 false, i32 5, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_ftz_default_f64( +; HSAIL: div_ftz_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_div_ftz_default_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fdiv.f64(i1 true, i32 1, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_div_ftz_near_even_f64( +; HSAIL: div_ftz_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_div_ftz_near_even_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fdiv.f64(i1 true, i32 2, double %x, double %y) #0 + ret double %ret +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.fmul.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.fmul.ll @@ -0,0 +1,105 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.fmul.f32(i1, i32, float, float) #0 +declare double @llvm.hsail.fmul.f64(i1, i32, double, double) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_mul_default_f32( +; HSAIL: mul_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_mul_default_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fmul.f32(i1 false, i32 1, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_near_f32( +; HSAIL: mul_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_mul_near_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fmul.f32(i1 false, i32 2, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_zero_f32( +; HSAIL: mul_zero_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_mul_zero_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fmul.f32(i1 false, i32 3, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_up_f32( +; HSAIL: mul_up_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_mul_up_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fmul.f32(i1 false, i32 4, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_down_f32( +; HSAIL: mul_down_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_mul_down_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fmul.f32(i1 false, i32 5, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_ftz_default_f32( +; HSAIL: mul_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_mul_ftz_default_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fmul.f32(i1 true, i32 1, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_ftz_near_even_f32( +; HSAIL: mul_ftz_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_mul_ftz_near_even_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fmul.f32(i1 true, i32 2, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_default_f64( +; HSAIL: mul_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_mul_default_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fmul.f64(i1 false, i32 1, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_near_f64( +; HSAIL: mul_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_mul_near_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fmul.f64(i1 false, i32 2, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_zero_f64( +; HSAIL: mul_zero_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_mul_zero_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fmul.f64(i1 false, i32 3, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_up_f64( +; HSAIL: mul_up_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_mul_up_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fmul.f64(i1 false, i32 4, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_down_f64( +; HSAIL: mul_down_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_mul_down_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fmul.f64(i1 false, i32 5, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_ftz_default_f64( +; HSAIL: mul_ftz_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_mul_ftz_default_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fmul.f64(i1 true, i32 1, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_mul_ftz_near_even_f64( +; HSAIL: mul_ftz_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_mul_ftz_near_even_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fmul.f64(i1 true, i32 2, double %x, double %y) #0 + ret double %ret +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.fsqrt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.fsqrt.ll @@ -0,0 +1,105 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.fsqrt.f32(i1, i32, float) #0 +declare double @llvm.hsail.fsqrt.f64(i1, i32, double) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_default_f32( +; HSAIL: sqrt_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sqrt_default_f32(float %x) #0 { + %ret = call float @llvm.hsail.fsqrt.f32(i1 false, i32 1, float %x) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_near_f32( +; HSAIL: sqrt_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sqrt_near_f32(float %x) #0 { + %ret = call float @llvm.hsail.fsqrt.f32(i1 false, i32 2, float %x) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_zero_f32( +; HSAIL: sqrt_zero_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sqrt_zero_f32(float %x) #0 { + %ret = call float @llvm.hsail.fsqrt.f32(i1 false, i32 3, float %x) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_up_f32( +; HSAIL: sqrt_up_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sqrt_up_f32(float %x) #0 { + %ret = call float @llvm.hsail.fsqrt.f32(i1 false, i32 4, float %x) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_down_f32( +; HSAIL: sqrt_down_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sqrt_down_f32(float %x) #0 { + %ret = call float @llvm.hsail.fsqrt.f32(i1 false, i32 5, float %x) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_ftz_default_f32( +; HSAIL: sqrt_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sqrt_ftz_default_f32(float %x) #0 { + %ret = call float @llvm.hsail.fsqrt.f32(i1 true, i32 1, float %x) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_ftz_near_even_f32( +; HSAIL: sqrt_ftz_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sqrt_ftz_near_even_f32(float %x) #0 { + %ret = call float @llvm.hsail.fsqrt.f32(i1 true, i32 2, float %x) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_default_f64( +; HSAIL: sqrt_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sqrt_default_f64(double %x) #0 { + %ret = call double @llvm.hsail.fsqrt.f64(i1 false, i32 1, double %x) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_near_f64( +; HSAIL: sqrt_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sqrt_near_f64(double %x) #0 { + %ret = call double @llvm.hsail.fsqrt.f64(i1 false, i32 2, double %x) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_zero_f64( +; HSAIL: sqrt_zero_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sqrt_zero_f64(double %x) #0 { + %ret = call double @llvm.hsail.fsqrt.f64(i1 false, i32 3, double %x) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_up_f64( +; HSAIL: sqrt_up_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sqrt_up_f64(double %x) #0 { + %ret = call double @llvm.hsail.fsqrt.f64(i1 false, i32 4, double %x) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_down_f64( +; HSAIL: sqrt_down_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sqrt_down_f64(double %x) #0 { + %ret = call double @llvm.hsail.fsqrt.f64(i1 false, i32 5, double %x) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_ftz_default_f64( +; HSAIL: sqrt_ftz_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sqrt_ftz_default_f64(double %x) #0 { + %ret = call double @llvm.hsail.fsqrt.f64(i1 true, i32 1, double %x) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sqrt_ftz_near_even_f64( +; HSAIL: sqrt_ftz_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sqrt_ftz_near_even_f64(double %x) #0 { + %ret = call double @llvm.hsail.fsqrt.f64(i1 true, i32 2, double %x) #0 + ret double %ret +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.fsub.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.fsub.ll @@ -0,0 +1,105 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.fsub.f32(i1, i32, float, float) #0 +declare double @llvm.hsail.fsub.f64(i1, i32, double, double) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_sub_default_f32( +; HSAIL: sub_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sub_default_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fsub.f32(i1 false, i32 1, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_near_f32( +; HSAIL: sub_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sub_near_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fsub.f32(i1 false, i32 2, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_zero_f32( +; HSAIL: sub_zero_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sub_zero_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fsub.f32(i1 false, i32 3, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_up_f32( +; HSAIL: sub_up_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sub_up_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fsub.f32(i1 false, i32 4, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_down_f32( +; HSAIL: sub_down_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sub_down_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fsub.f32(i1 false, i32 5, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_ftz_default_f32( +; HSAIL: sub_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sub_ftz_default_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fsub.f32(i1 true, i32 1, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_ftz_near_even_f32( +; HSAIL: sub_ftz_near_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_sub_ftz_near_even_f32(float %x, float %y) #0 { + %ret = call float @llvm.hsail.fsub.f32(i1 true, i32 2, float %x, float %y) #0 + ret float %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_default_f64( +; HSAIL: sub_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sub_default_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fsub.f64(i1 false, i32 1, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_near_f64( +; HSAIL: sub_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sub_near_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fsub.f64(i1 false, i32 2, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_zero_f64( +; HSAIL: sub_zero_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sub_zero_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fsub.f64(i1 false, i32 3, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_up_f64( +; HSAIL: sub_up_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sub_up_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fsub.f64(i1 false, i32 4, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_down_f64( +; HSAIL: sub_down_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sub_down_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fsub.f64(i1 false, i32 5, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_ftz_default_f64( +; HSAIL: sub_ftz_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sub_ftz_default_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fsub.f64(i1 true, i32 1, double %x, double %y) #0 + ret double %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sub_ftz_near_even_f64( +; HSAIL: sub_ftz_near_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_sub_ftz_near_even_f64(double %x, double %y) #0 { + %ret = call double @llvm.hsail.fsub.f64(i1 true, i32 2, double %x, double %y) #0 + ret double %ret +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.gcn.bfm.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.gcn.bfm.ll @@ -0,0 +1,41 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.gcn.bfm(i32, i32) #0 +declare i32 @llvm.HSAIL.bfm(i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_bfm_i32( +; HSAIL: gcn_bfm_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_bfm_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.gcn.bfm(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bfm_i32_ri( +; HSAIL: gcn_bfm_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_bfm_i32_ri(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.bfm(i32 %x, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bfm_i32_ir( +; HSAIL: gcn_bfm_b32 {{\$s[0-9]+}}, 3, {{\$s[0-9]+}}; +define i32 @test_bfm_i32_ir(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.bfm(i32 3, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_bfm_i32_ii( +; HSAIL: gcn_bfm_b32 {{\$s[0-9]+}}, 6, 9; +define i32 @test_bfm_i32_ii() #0 { + %val = call i32 @llvm.hsail.gcn.bfm(i32 6, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_bfm_i32( +; HSAIL: gcn_bfm_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_bfm_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.bfm(i32 %x, i32 %y) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.gcn.mqsad.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.gcn.mqsad.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i64 @llvm.hsail.gcn.mqsad(i64, i32, i64) #0 +declare i64 @llvm.HSAIL.mqsad(i64, i32, i64) #0 + +; HSAIL-LABEL: {{^}}prog function &test_mqsad_i64( +; HSAIL: gcn_mqsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mqsad_i64(i64 %x, i32 %y, i64 %z) #0 { + %val = call i64 @llvm.hsail.gcn.mqsad(i64 %x, i32 %y, i64 %z) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mqsad_i64_rii( +; HSAIL: gcn_mqsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mqsad_i64_rii(i64 %x) #0 { + %val = call i64 @llvm.hsail.gcn.mqsad(i64 %x, i32 16777216, i64 2) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mqsad_i64_rir( +; HSAIL: gcn_mqsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mqsad_i64_rir(i64 %x, i64 %y) #0 { + %val = call i64 @llvm.hsail.gcn.mqsad(i64 %x, i32 1, i64 %y) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mqsad_i64_rri( +; HSAIL: gcn_mqsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mqsad_i64_rri(i64 %x, i32 %y) #0 { + %val = call i64 @llvm.hsail.gcn.mqsad(i64 %x, i32 %y, i64 7) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mqsad_i64_iri( +; HSAIL: gcn_mqsad_b64 {{\$d[0-9]+}}, 342421, {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mqsad_i64_iri(i32 %x) #0 { + %val = call i64 @llvm.hsail.gcn.mqsad(i64 342421, i32 %x, i64 9) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mqsad_i64_iir( +; HSAIL: gcn_mqsad_b64 {{\$d[0-9]+}}, 256, {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mqsad_i64_iir(i64 %x) #0 { + %val = call i64 @llvm.hsail.gcn.mqsad(i64 256, i32 65536, i64 %x) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mqsad_i64_iii( +; HSAIL: gcn_mqsad_b64 {{\$d[0-9]+}}, 3, {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mqsad_i64_iii() #0 { + %val = call i64 @llvm.hsail.gcn.mqsad(i64 3, i32 1234, i64 11) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_mqsad_i64( +; HSAIL: gcn_mqsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_legacy_mqsad_i64(i64 %x, i32 %y, i64 %z) #0 { + %val = call i64 @llvm.HSAIL.mqsad(i64 %x, i32 %y, i64 %z) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.gcn.msad.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.gcn.msad.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.gcn.msad(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.msad(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_msad_i32( +; HSAIL: gcn_msad_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_msad_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.gcn.msad(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_msad_i32_rii( +; HSAIL: gcn_msad_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 16777216, 2; +define i32 @test_msad_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.msad(i32 %x, i32 16777216, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_msad_i32_rir( +; HSAIL: gcn_msad_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_msad_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.gcn.msad(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_msad_i32_rri( +; HSAIL: gcn_msad_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_msad_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.gcn.msad(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_msad_i32_iri( +; HSAIL: gcn_msad_b32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_msad_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.msad(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_msad_i32_iir( +; HSAIL: gcn_msad_b32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_msad_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.msad(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_msad_i32_iii( +; HSAIL: gcn_msad_b32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_msad_i32_iii() #0 { + %val = call i32 @llvm.hsail.gcn.msad(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_msad_i32( +; HSAIL: gcn_msad_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_msad_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.msad(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.gcn.qsad.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.gcn.qsad.ll @@ -0,0 +1,61 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i64 @llvm.hsail.gcn.qsad(i64, i64, i64) #0 +declare i64 @llvm.HSAIL.qsad(i64, i64, i64) #0 +; HSAIL-LABEL: {{^}}prog function &test_qsad_i64( +; HSAIL: gcn_qsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_qsad_i64(i64 %x, i64 %y, i64 %z) #0 { + %val = call i64 @llvm.hsail.gcn.qsad(i64 %x, i64 %y, i64 %z) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_qsad_i64_rii( +; HSAIL: gcn_qsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 16777216, 2; +define i64 @test_qsad_i64_rii(i64 %x) #0 { + %val = call i64 @llvm.hsail.gcn.qsad(i64 %x, i64 16777216, i64 2) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_qsad_i64_rir( +; HSAIL: gcn_qsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 1, {{\$d[0-9]+}}; +define i64 @test_qsad_i64_rir(i64 %x, i64 %y) #0 { + %val = call i64 @llvm.hsail.gcn.qsad(i64 %x, i64 1, i64 %y) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_qsad_i64_rri( +; HSAIL: gcn_qsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, 7; +define i64 @test_qsad_i64_rri(i64 %x, i64 %y) #0 { + %val = call i64 @llvm.hsail.gcn.qsad(i64 %x, i64 %y, i64 7) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_qsad_i64_iri( +; HSAIL: gcn_qsad_b64 {{\$d[0-9]+}}, 342421, {{\$d[0-9]+}}, 9; +define i64 @test_qsad_i64_iri(i64 %x) #0 { + %val = call i64 @llvm.hsail.gcn.qsad(i64 342421, i64 %x, i64 9) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_qsad_i64_iir( +; HSAIL: gcn_qsad_b64 {{\$d[0-9]+}}, 256, 65536, {{\$d[0-9]+}}; +define i64 @test_qsad_i64_iir(i64 %x) #0 { + %val = call i64 @llvm.hsail.gcn.qsad(i64 256, i64 65536, i64 %x) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_qsad_i64_iii( +; HSAIL: gcn_qsad_b64 {{\$d[0-9]+}}, 3, 1234, 11; +define i64 @test_qsad_i64_iii() #0 { + %val = call i64 @llvm.hsail.gcn.qsad(i64 3, i64 1234, i64 11) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_qsad_i64( +; HSAIL: gcn_qsad_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_legacy_qsad_i64(i64 %x, i64 %y, i64 %z) #0 { + %val = call i64 @llvm.HSAIL.qsad(i64 %x, i64 %y, i64 %z) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.gcn.sadd.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.gcn.sadd.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.sadd(i32, i32, i32) #0 +declare i32 @llvm.hsail.gcn.sadd(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_sadd_i32( +; HSAIL: gcn_sadd_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sadd_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.gcn.sadd(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadd_i32_rii( +; HSAIL: gcn_sadd_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 16777216, 2; +define i32 @test_sadd_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.sadd(i32 %x, i32 16777216, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadd_i32_rir( +; HSAIL: gcn_sadd_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_sadd_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.gcn.sadd(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadd_i32_rri( +; HSAIL: gcn_sadd_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_sadd_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.gcn.sadd(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadd_i32_iri( +; HSAIL: gcn_sadd_b32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_sadd_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.sadd(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadd_i32_iir( +; HSAIL: gcn_sadd_b32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_sadd_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.sadd(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadd_i32_iii( +; HSAIL: gcn_sadd_b32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_sadd_i32_iii() #0 { + %val = call i32 @llvm.hsail.gcn.sadd(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_sadd_i32( +; HSAIL: gcn_sadd_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_sadd_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.sadd(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.gcn.sadw.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.gcn.sadw.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.gcn.sadw(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.sadw(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_sadw_i32( +; HSAIL: gcn_sadw_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sadw_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.gcn.sadw(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadw_i32_rii( +; HSAIL: gcn_sadw_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 16777216, 2; +define i32 @test_sadw_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.sadw(i32 %x, i32 16777216, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadw_i32_rir( +; HSAIL: gcn_sadw_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_sadw_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.gcn.sadw(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadw_i32_rri( +; HSAIL: gcn_sadw_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_sadw_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.gcn.sadw(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadw_i32_iri( +; HSAIL: gcn_sadw_b32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_sadw_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.sadw(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadw_i32_iir( +; HSAIL: gcn_sadw_b32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_sadw_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.gcn.sadw(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadw_i32_iii( +; HSAIL: gcn_sadw_b32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_sadw_i32_iii() #0 { + %val = call i32 @llvm.hsail.gcn.sadw(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_sadw_i32( +; HSAIL: gcn_sadw_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_sadw_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.sadw(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.gridgroups.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.gridgroups.ll @@ -0,0 +1,56 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.gridgroups(i32) #0 +declare i32 @llvm.HSAIL.get.num.groups(i32) #0 + + +; FUNC-LABEL: {{^}}prog function &test_gridgroups_0 +; HSAIL: gridgroups_u32 {{\$s[0-9]+}}, 0; +define void @test_gridgroups_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.gridgroups(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_gridgroups_1 +; HSAIL: gridgroups_u32 {{\$s[0-9]+}}, 1; +define void @test_gridgroups_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.gridgroups(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_gridgroups_2 +; HSAIL: gridgroups_u32 {{\$s[0-9]+}}, 2; +define void @test_gridgroups_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.gridgroups(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_gridgroups_0 +; HSAIL: gridgroups_u32 {{\$s[0-9]+}}, 0; +define void @test_legacy_gridgroups_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.num.groups(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_gridgroups_1 +; HSAIL: gridgroups_u32 {{\$s[0-9]+}}, 1; +define void @test_legacy_gridgroups_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.num.groups(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_gridgroups_2 +; HSAIL: gridgroups_u32 {{\$s[0-9]+}}, 2; +define void @test_legacy_gridgroups_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.num.groups(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.gridsize.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.gridsize.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.gridsize.i32(i32) #0 +declare i64 @llvm.hsail.gridsize.i64(i32) #0 + +declare i32 @llvm.HSAIL.get.global.size(i32) #0 + + +; FUNC-LABEL: {{^}}prog function &test_gridsize_i32_0 +; HSAIL: gridsize_u32 {{\$s[0-9]+}}, 0; +define void @test_gridsize_i32_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.gridsize.i32(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_gridsize_i32_1 +; HSAIL: gridsize_u32 {{\$s[0-9]+}}, 1; +define void @test_gridsize_i32_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.gridsize.i32(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_gridsize_i32_2 +; HSAIL: gridsize_u32 {{\$s[0-9]+}}, 2; +define void @test_gridsize_i32_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.gridsize.i32(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_gridsize_i64_0 +; HSAIL: gridsize_u64 {{\$d[0-9]+}}, 0; +define void @test_gridsize_i64_0(i64 addrspace(1)* %out) #1 { + %tmp0 = call i64 @llvm.hsail.gridsize.i64(i32 0) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_gridsize_i64_1 +; HSAIL: gridsize_u64 {{\$d[0-9]+}}, 1; +define void @test_gridsize_i64_1(i64 addrspace(1)* %out) #1 { + %tmp0 = call i64 @llvm.hsail.gridsize.i64(i32 1) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_gridsize_i64_2 +; HSAIL: gridsize_u64 {{\$d[0-9]+}}, 2; +define void @test_gridsize_i64_2(i64 addrspace(1)* %out) #1 { + %tmp0 = call i64 @llvm.hsail.gridsize.i64(i32 2) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_gridsize_0 +; HSAIL: gridsize_u32 {{\$s[0-9]+}}, 0; +define void @test_legacy_gridsize_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.global.size(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_gridsize_1 +; HSAIL: gridsize_u32 {{\$s[0-9]+}}, 1; +define void @test_legacy_gridsize_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.global.size(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_gridsize_2 +; HSAIL: gridsize_u32 {{\$s[0-9]+}}, 2; +define void @test_legacy_gridsize_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.global.size(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + + Index: test/CodeGen/HSAIL/llvm.hsail.groupbaseptr.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.groupbaseptr.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 < %s | FileCheck -check-prefix=HSAIL %s + +declare i8 addrspace(3)* @llvm.hsail.groupbaseptr() #0 + +; HSAIL-LABEL: {{^}}prog function &test_groupbaseptr( +; HSAIL: groupbaseptr_u32 [[PTR:\$s[0-9]+]]; +define i8 addrspace(3)* @test_groupbaseptr() #1 { + %tmp0 = call i8 addrspace(3)* @llvm.hsail.groupbaseptr() #0 + ret i8 addrspace(3)* %tmp0 +} + +; HSAIL-LABEL: {{^}}prog function &test_groupbaseptr_load( +; HSAIL: groupbaseptr_u32 [[PTR:\$s[0-9]+]]; +; HSAIL: ld_group_align(4)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]{{\]}}; +define i32 @test_groupbaseptr_load() #1 { + %tmp0 = call i8 addrspace(3)* @llvm.hsail.groupbaseptr() #0 + %bc = bitcast i8 addrspace(3)* %tmp0 to i32 addrspace(3)* + %load = load i32, i32 addrspace(3)* %bc + ret i32 %load +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.imagefence.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.imagefence.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &test_imagefence( +; HSAIL: imagefence; +define void @test_imagefence() #0 { + call void @llvm.hsail.imagefence() #0 + ret void +} +declare void @llvm.hsail.imagefence() #0 + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.kernargbaseptr.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.kernargbaseptr.ll @@ -0,0 +1,29 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL32 -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + +declare i8 addrspace(7)* @llvm.hsail.kernargbaseptr() #0 + +; HSAIL-LABEL: {{^}}prog function &test_kernargbaseptr( +; HSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; + +; HSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +define i8 addrspace(7)* @test_kernargbaseptr() #1 { + %tmp0 = call i8 addrspace(7)* @llvm.hsail.kernargbaseptr() #0 + ret i8 addrspace(7)* %tmp0 +} + +; HSAIL-LABEL: {{^}}prog function &test_kernargbaseptr_load( +; HSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; +; HSAIL32: ld_kernarg_align(4)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]{{\]}}; + +; HSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +; HSAIL64: ld_kernarg_align(4)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]{{\]}}; +define i32 @test_kernargbaseptr_load() #1 { + %tmp0 = call i8 addrspace(7)* @llvm.hsail.kernargbaseptr() #0 + %bc = bitcast i8 addrspace(7)* %tmp0 to i32 addrspace(7)* + %load = load i32, i32 addrspace(7)* %bc + ret i32 %load +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.laneid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.laneid.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.laneid() #0 +declare i32 @llvm.HSAIL.get.lane.id() #0 + +; FUNC-LABEL: {{^}}prog function &test_laneid( +; HSAIL: laneid_u32 {{\$s[0-9]+}}; +define void @test_laneid(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.laneid() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_get_laneid( +; HSAIL: laneid_u32 {{\$s[0-9]+}}; +define void @test_legacy_get_laneid(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.lane.id() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.lastbit.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.lastbit.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; FIXME: Should also support i64 +declare i32 @llvm.hsail.lastbit.i32(i32) #0 +declare i32 @llvm.hsail.lastbit.i64(i64) #0 + +declare i32 @llvm.HSAIL.lastbit.u32(i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_lastbit_i32( +; HSAIL: lastbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_lastbit_i32(i32 %x) #0 { + %ret = call i32 @llvm.hsail.lastbit.i32(i32 %x) #0 + ret i32 %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_lastbit_i64( +; HSAIL: lastbit_u32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_lastbit_i64(i64 %x) #0 { + %ret = call i32 @llvm.hsail.lastbit.i64(i64 %x) #0 + ret i32 %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_lastbit_i32( +; HSAIL: lastbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_lastbit_i32(i32 %x) #0 { + %ret = call i32 @llvm.HSAIL.lastbit.u32(i32 %x) #0 + ret i32 %ret +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.lerp.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.lerp.ll @@ -0,0 +1,62 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.lerp(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.lerp.u8x4(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_lerp_i32( +; HSAIL: lerp_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_lerp_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.lerp(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_lerp_i32_rii( +; HSAIL: lerp_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u8x4(1,0,0,0), u8x4(0,0,0,2); +define i32 @test_lerp_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.lerp(i32 %x, i32 16777216, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_lerp_i32_rir( +; HSAIL: lerp_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u8x4(0,0,0,1), {{\$s[0-9]+}}; +define i32 @test_lerp_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.lerp(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_lerp_i32_rri( +; HSAIL: lerp_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, u8x4(0,0,0,7); +define i32 @test_lerp_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.lerp(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_lerp_i32_iri( +; HSAIL: lerp_u8x4 {{\$s[0-9]+}}, u8x4(0,5,57,149), {{\$s[0-9]+}}, u8x4(65,242,128,0); +define i32 @test_lerp_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.lerp(i32 342421, i32 %x, i32 1106411520) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_lerp_i32_iir( +; HSAIL: lerp_u8x4 {{\$s[0-9]+}}, u8x4(0,0,1,0), u8x4(0,1,0,0), {{\$s[0-9]+}}; +define i32 @test_lerp_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.lerp(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_lerp_i32_iii( +; HSAIL: lerp_u8x4 {{\$s[0-9]+}}, u8x4(0,0,0,3), u8x4(0,0,4,210), u8x4(0,0,0,11); +define i32 @test_lerp_i32_iii() #0 { + %val = call i32 @llvm.hsail.lerp(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_lerp_i32( +; HSAIL: lerp_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_lerp_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.lerp.u8x4(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.maxcuid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.maxcuid.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.maxcuid() #0 + +; FUNC-LABEL: {{^}}prog function &test_maxcuid( +; HSAIL: maxcuid_u32 {{\$s[0-9]+}}; +define void @test_maxcuid(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.maxcuid() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.maxwaveid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.maxwaveid.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.maxwaveid() #0 +declare i32 @llvm.HSAIL.get.maxdynwave.id() #0 + +; FUNC-LABEL: {{^}}prog function &test_maxwaveid( +; HSAIL: maxwaveid_u32 {{\$s[0-9]+}}; +define void @test_maxwaveid(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.maxwaveid() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_get_maxdynwave_id +; HSAIL: maxwaveid_u32 {{\$s[0-9]+}}; +define void @test_legacy_get_maxdynwave_id(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.maxdynwave.id() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.memfence.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.memfence.ll @@ -0,0 +1,98 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare void @llvm.hsail.memfence(i32, i32) #0 +declare void @llvm.HSAIL.memfence(i32, i32) #0 + + + +; HSAIL-LABEL: {{^}}prog function &test_memfence_0( +; HSAIL: memfence_scacq_wave; +define void @test_memfence_0(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 2, i32 2) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_1( +; HSAIL: memfence_scacq_wg; +define void @test_memfence_1(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 2, i32 3) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_2( +; HSAIL: memfence_scacq_agent; +define void @test_memfence_2(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 2, i32 4) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_3( +; HSAIL: memfence_scacq_system; +define void @test_memfence_3(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 2, i32 5) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_4( +; HSAIL: memfence_screl_wave; +define void @test_memfence_4(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 3, i32 2) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_5( +; HSAIL: memfence_screl_wg; +define void @test_memfence_5(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 3, i32 3) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_6( +; HSAIL: memfence_screl_agent; +define void @test_memfence_6(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 3, i32 4) #0 + ret void +} +; HSAIL-LABEL: {{^}}prog function &test_memfence_7( +; HSAIL: memfence_screl_system; +define void @test_memfence_7(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 3, i32 5) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_8( +; HSAIL: memfence_scar_wave; +define void @test_memfence_8(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 4, i32 2) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_9( +; HSAIL: memfence_scar_wg; +define void @test_memfence_9(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 4, i32 3) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_memfence_10( +; HSAIL: memfence_scar_agent; +define void @test_memfence_10(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 4, i32 4) #0 + ret void +} +; HSAIL-LABEL: {{^}}prog function &test_memfence_11( +; HSAIL: memfence_scar_system; +define void @test_memfence_11(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.memfence(i32 4, i32 5) #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_memfence_0( +; HSAIL: memfence_scacq_wave; +define void @test_legacy_memfence_0(i32 addrspace(1)* %out) #0 { + call void @llvm.HSAIL.memfence(i32 2, i32 2) #0 + ret void +} + + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.ncos.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.ncos.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; FIXME: Do we really need this over llvm.cos? + +declare float @llvm.hsail.ncos.f32(float) #0 +declare float @llvm.HSAIL.ncos.f32(float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_ncos_f32( +; HSAIL: ncos_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_ncos_f32(float %x) #0 { + %cos = call float @llvm.hsail.ncos.f32(float %x) #0 + ret float %cos +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_ncos_f32( +; HSAIL: ncos_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_legacy_ncos_f32(float %x) #0 { + %cos = call float @llvm.HSAIL.ncos.f32(float %x) #0 + ret float %cos +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.nexp2.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.nexp2.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; FIXME: Do we really need this over llvm.exp2? + +declare float @llvm.hsail.nexp2.f32(float) #0 +declare float @llvm.HSAIL.nexp2.f32(float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_nexp2_f32( +; HSAIL: nexp2_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_nexp2_f32(float %x) #0 { + %exp2 = call float @llvm.hsail.nexp2.f32(float %x) #0 + ret float %exp2 +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nexp2_f32( +; HSAIL: nexp2_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_legacy_nexp2_f32(float %x) #0 { + %exp2 = call float @llvm.HSAIL.nexp2.f32(float %x) #0 + ret float %exp2 +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.nfma.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.nfma.ll @@ -0,0 +1,40 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.nfma.f32(float, float, float) #0 +declare double @llvm.hsail.nfma.f64(double, double, double) #0 + + +; Legacy names +declare float @llvm.HSAIL.nfma.f32(float, float, float) #0 +declare double @llvm.HSAIL.nfma.f64(double, double, double) #0 + +; HSAIL-LABEL: {{^}}prog function &test_nfma_f32( +; HSAIL: nfma_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_nfma_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.hsail.nfma.f32(float %x, float %y, float %z) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_nfma_f64( +; HSAIL: nfma_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_nfma_f64(double %x, double %y, double %z) #0 { + %val = call double @llvm.hsail.nfma.f64(double %x, double %y, double %z) #0 + ret double %val +} + + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nfma_f32( +; HSAIL: nfma_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_legacy_nfma_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.HSAIL.nfma.f32(float %x, float %y, float %z) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nfma_f64( +; HSAIL: nfma_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_legacy_nfma_f64(double %x, double %y, double %z) #0 { + %val = call double @llvm.HSAIL.nfma.f64(double %x, double %y, double %z) #0 + ret double %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.nlog2.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.nlog2.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; FIXME: Do we really need this over llvm.log2? +declare float @llvm.hsail.nlog2.f32(float) #0 +declare float @llvm.HSAIL.nlog2.f32(float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_nlog2_f32( +; HSAIL: nlog2_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_nlog2_f32(float %x) #0 { + %log2 = call float @llvm.hsail.nlog2.f32(float %x) #0 + ret float %log2 +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nlog2_f32( +; HSAIL: nlog2_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_legacy_nlog2_f32(float %x) #0 { + %log2 = call float @llvm.HSAIL.nlog2.f32(float %x) #0 + ret float %log2 +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.nrcp.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.nrcp.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.nrcp.f32(float) #0 +declare double @llvm.hsail.nrcp.f64(double) #0 +declare float @llvm.HSAIL.nrcp.f32(float) #0 +declare double @llvm.HSAIL.nrcp.f64(double) #0 + +; HSAIL-LABEL: {{^}}prog function &test_nrcp_f32( +; HSAIL: nrcp_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_nrcp_f32(float %x) #0 { + %sqrt = call float @llvm.hsail.nrcp.f32(float %x) #0 + ret float %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_nrcp_f64( +; HSAIL: nrcp_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_nrcp_f64(double %x) #0 { + %sqrt = call double @llvm.hsail.nrcp.f64(double %x) #0 + ret double %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nrcp_f32( +; HSAIL: nrcp_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_legacy_nrcp_f32(float %x) #0 { + %sqrt = call float @llvm.HSAIL.nrcp.f32(float %x) #0 + ret float %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nrcp_f64( +; HSAIL: nrcp_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_legacy_nrcp_f64(double %x) #0 { + %sqrt = call double @llvm.HSAIL.nrcp.f64(double %x) #0 + ret double %sqrt +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.nrsqrt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.nrsqrt.ll @@ -0,0 +1,38 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.nrsqrt.f32(float) #0 +declare double @llvm.hsail.nrsqrt.f64(double) #0 + +declare float @llvm.HSAIL.nrsqrt.f32(float) #0 +declare double @llvm.HSAIL.nrsqrt.f64(double) #0 + +; HSAIL-LABEL: {{^}}prog function &test_nrsqrt_f32( +; HSAIL: nrsqrt_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_nrsqrt_f32(float %x) #0 { + %sqrt = call float @llvm.hsail.nrsqrt.f32(float %x) #0 + ret float %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_nrsqrt_f64( +; HSAIL: nrsqrt_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_nrsqrt_f64(double %x) #0 { + %sqrt = call double @llvm.hsail.nrsqrt.f64(double %x) #0 + ret double %sqrt +} + + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nrsqrt_f32( +; HSAIL: nrsqrt_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_legacy_nrsqrt_f32(float %x) #0 { + %sqrt = call float @llvm.HSAIL.nrsqrt.f32(float %x) #0 + ret float %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nrsqrt_f64( +; HSAIL: nrsqrt_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_legacy_nrsqrt_f64(double %x) #0 { + %sqrt = call double @llvm.HSAIL.nrsqrt.f64(double %x) #0 + ret double %sqrt +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.nsin.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.nsin.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; FIXME: Do we really need this over llvm.sin? +declare float @llvm.hsail.nsin.f32(float) #0 +declare float @llvm.HSAIL.nsin.f32(float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_nsin_f32( +; HSAIL: nsin_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_nsin_f32(float %x) #0 { + %sin = call float @llvm.hsail.nsin.f32(float %x) #0 + ret float %sin +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nsin_f32( +; HSAIL: nsin_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_legacy_nsin_f32(float %x) #0 { + %sin = call float @llvm.HSAIL.nsin.f32(float %x) #0 + ret float %sin +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.nsqrt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.nsqrt.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-NOT: __hsail_nsqrt_f32 + +; FIXME: Do we really need this over llvm.sqrt? What is nsqrt's behavior for < 0.0? + +declare float @llvm.hsail.nsqrt.f32(float) #0 +declare double @llvm.hsail.nsqrt.f64(double) #0 + +declare float @llvm.HSAIL.nsqrt.f32(float) #0 +declare double @llvm.HSAIL.nsqrt.f64(double) #0 +declare float @__hsail_nsqrt_f32(float) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_nsqrt_f32( +; HSAIL: nsqrt_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_nsqrt_f32(float %x) #0 { + %sqrt = call float @llvm.hsail.nsqrt.f32(float %x) #0 + ret float %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_nsqrt_f64( +; HSAIL: nsqrt_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_nsqrt_f64(double %x) #0 { + %sqrt = call double @llvm.hsail.nsqrt.f64(double %x) #0 + ret double %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nsqrt_f32( +; HSAIL: nsqrt_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_legacy_nsqrt_f32(float %x) #0 { + %sqrt = call float @llvm.HSAIL.nsqrt.f32(float %x) #0 + ret float %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_nsqrt_f64( +; HSAIL: nsqrt_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define double @test_legacy_nsqrt_f64(double %x) #0 { + %sqrt = call double @llvm.HSAIL.nsqrt.f64(double %x) #0 + ret double %sqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_nsqrt_gccbuiltin_f32( +; HSAIL: nsqrt_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_nsqrt_gccbuiltin_f32(float %x) #0 { + %sqrt = call float @__hsail_nsqrt_f32(float %x) #0 + ret float %sqrt +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.packcvt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.packcvt.ll @@ -0,0 +1,105 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.packcvt(float, float, float, float) #0 +declare i32 @llvm.HSAIL.packcvt.u8x4.f32(float, float, float, float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_rrrr( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_packcvt_u8x4_f32_rrrr(float %x, float %y, float %z, float %w) #0 { + %val = call i32 @llvm.hsail.packcvt(float %x, float %y, float %z, float %w) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_rrri( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000; +define i32 @test_packcvt_u8x4_f32_rrri(float %x, float %y, float %z) #0 { + %val = call i32 @llvm.hsail.packcvt(float %x, float %y, float %z, float 1.0) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_rrir( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, {{\$s[0-9]+}}; +define i32 @test_packcvt_u8x4_f32_rrir(float %x, float %y, float %z) #0 { + %val = call i32 @llvm.hsail.packcvt(float %x, float %y, float 1.0, float %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_rirr( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_packcvt_u8x4_f32_rirr(float %x, float %y, float %z) #0 { + %val = call i32 @llvm.hsail.packcvt(float %x, float 1.0, float %y, float %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_irrr( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, 0F3f800000, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_packcvt_u8x4_f32_irrr(float %x, float %y, float %z) #0 { + %val = call i32 @llvm.hsail.packcvt(float 1.0, float %x, float %y, float %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_iirr( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, 0F3f800000, 0F40000000, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_packcvt_u8x4_f32_iirr(float %x, float %y) #0 { + %val = call i32 @llvm.hsail.packcvt(float 1.0, float 2.0, float %x, float %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_riir( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, 0F40000000, {{\$s[0-9]+}}; +define i32 @test_packcvt_u8x4_f32_riir(float %x, float %y) #0 { + %val = call i32 @llvm.hsail.packcvt(float %x, float 1.0, float 2.0, float %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_rrii( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, 0F40000000; +define i32 @test_packcvt_u8x4_f32_rrii(float %x, float %y) #0 { + %val = call i32 @llvm.hsail.packcvt(float %x, float %y, float 1.0, float 2.0) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_riri( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, {{\$s[0-9]+}}, 0F40000000; +define i32 @test_packcvt_u8x4_f32_riri(float %x, float %y) #0 { + %val = call i32 @llvm.hsail.packcvt(float %x, float 1.0, float %y, float 2.0) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_irir( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, 0F3f800000, {{\$s[0-9]+}}, 0F40000000, {{\$s[0-9]+}}; +define i32 @test_packcvt_u8x4_f32_irir(float %x, float %y) #0 { + %val = call i32 @llvm.hsail.packcvt(float 1.0, float %x, float 2.0, float %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_iiir( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, 0F3f800000, 0F40000000, 0F40400000, {{\$s[0-9]+}}; +define i32 @test_packcvt_u8x4_f32_iiir(float %x) #0 { + %val = call i32 @llvm.hsail.packcvt(float 1.0, float 2.0, float 3.0, float %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_riii( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, 0F40000000, 0F40400000; +define i32 @test_packcvt_u8x4_f32_riii(float %x) #0 { + %val = call i32 @llvm.hsail.packcvt(float %x, float 1.0, float 2.0, float 3.0) #0 + ret i32 %val +} + +; FIXME: This should constant fold. +; HSAIL-LABEL: {{^}}prog function &test_packcvt_u8x4_f32_iiii( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, 0F3f800000, 0F40000000, 0F40400000, 0F40800000; +define i32 @test_packcvt_u8x4_f32_iiii(float %x) #0 { + %val = call i32 @llvm.hsail.packcvt(float 1.0, float 2.0, float 3.0, float 4.0) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_packcvt_u8x4_f32_rrrr( +; HSAIL: packcvt_u8x4_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_packcvt_u8x4_f32_rrrr(float %x, float %y, float %z, float %w) #0 { + %val = call i32 @llvm.HSAIL.packcvt.u8x4.f32(float %x, float %y, float %z, float %w) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.packetid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.packetid.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i64 @llvm.hsail.packetid() #0 + +; FUNC-LABEL: {{^}}prog function &test_packetid( +; HSAIL: packetid_u64 {{\$d[0-9]+}}; +define void @test_packetid(i64 addrspace(1)* %out) #1 { + %tmp0 = call i64 @llvm.hsail.packetid() #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.sad.u32.u16x2.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.sad.u32.u16x2.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.sad.u32.u16x2(i32, i32, i32) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32( +; HSAIL: sad_u32_u16x2 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.sad.u32.u16x2(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rii( +; HSAIL: sad_u32_u16x2 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u16x2(256,0), 2; +define i32 @test_sad_u32_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u16x2(i32 %x, i32 16777216, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rir( +; HSAIL: sad_u32_u16x2 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u16x2(0,1), {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sad.u32.u16x2(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rri( +; HSAIL: sad_u32_u16x2 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_sad_u32_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sad.u32.u16x2(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iri( +; HSAIL: sad_u32_u16x2 {{\$s[0-9]+}}, u16x2(5,14741), {{\$s[0-9]+}}, 9; +define i32 @test_sad_u32_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u16x2(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iir( +; HSAIL: sad_u32_u16x2 {{\$s[0-9]+}}, u16x2(0,256), u16x2(1,0), {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u16x2(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iii( +; HSAIL: sad_u32_u16x2 {{\$s[0-9]+}}, u16x2(0,3), u16x2(0,1234), 11; +define i32 @test_sad_u32_i32_iii() #0 { + %val = call i32 @llvm.hsail.sad.u32.u16x2(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.sad.u32.u32.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.sad.u32.u32.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.sad.u32.u32(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32( +; HSAIL: sad_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.sad.u32.u32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rii( +; HSAIL: sad_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 16777216, 2; +define i32 @test_sad_u32_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u32(i32 %x, i32 16777216, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rir( +; HSAIL: sad_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sad.u32.u32(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rri( +; HSAIL: sad_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_sad_u32_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sad.u32.u32(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iri( +; HSAIL: sad_u32_u32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_sad_u32_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u32(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iir( +; HSAIL: sad_u32_u32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u32(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iii( +; HSAIL: sad_u32_u32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_sad_u32_i32_iii() #0 { + %val = call i32 @llvm.hsail.sad.u32.u32(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.sad.u32.u8x4.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.sad.u32.u8x4.ll @@ -0,0 +1,63 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.sad.u32.u8x4(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.sad.u32.u8x4(i32, i32, i32) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32( +; HSAIL: sad_u32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.sad.u32.u8x4(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rii( +; HSAIL: sad_u32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u8x4(1,0,0,0), 2; +define i32 @test_sad_u32_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u8x4(i32 %x, i32 16777216, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rir( +; HSAIL: sad_u32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u8x4(0,0,0,1), {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sad.u32.u8x4(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_rri( +; HSAIL: sad_u32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_sad_u32_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sad.u32.u8x4(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iri( +; HSAIL: sad_u32_u8x4 {{\$s[0-9]+}}, u8x4(0,5,57,149), {{\$s[0-9]+}}, 9; +define i32 @test_sad_u32_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u8x4(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iir( +; HSAIL: sad_u32_u8x4 {{\$s[0-9]+}}, u8x4(0,0,1,0), u8x4(0,1,0,0), {{\$s[0-9]+}}; +define i32 @test_sad_u32_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.sad.u32.u8x4(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sad_u32_i32_iii( +; HSAIL: sad_u32_u8x4 {{\$s[0-9]+}}, u8x4(0,0,0,3), u8x4(0,0,4,210), 11; +define i32 @test_sad_u32_i32_iii() #0 { + %val = call i32 @llvm.hsail.sad.u32.u8x4(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_sad_u32_i32( +; HSAIL: sad_u32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_sad_u32_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.sad.u32.u8x4(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.sadhi.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.sadhi.ll @@ -0,0 +1,69 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.sadhi(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.sadhi.u16x2.u8x4(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_sadhi_u16x2_i32( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sadhi_u16x2_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.sadhi(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadhi_u16x2_i32_rii( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u8x4(1,0,0,0), u16x2(0,2); +define i32 @test_sadhi_u16x2_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.sadhi(i32 %x, i32 16777216, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadhi_u16x2_i32_rir( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u8x4(1,1,1,1), {{\$s[0-9]+}}; +define i32 @test_sadhi_u16x2_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sadhi(i32 %x, i32 16843009, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadhi_u16x2_i32_rri( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, u16x2(2,123); +define i32 @test_sadhi_u16x2_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sadhi(i32 %x, i32 %y, i32 131195) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadhi_u16x2_i32_rii_max( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, u8x4(255,255,255,255), u16x2(65535,65535); +define i32 @test_sadhi_u16x2_i32_rii_max(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sadhi(i32 %x, i32 4294967295, i32 4294967295) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadhi_u16x2_i32_iri( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, u8x4(0,5,57,149), {{\$s[0-9]+}}, u16x2(0,9); +define i32 @test_sadhi_u16x2_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.sadhi(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadhi_u16x2_i32_iir( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, u8x4(0,0,1,0), u8x4(0,1,0,0), {{\$s[0-9]+}}; +define i32 @test_sadhi_u16x2_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.sadhi(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sadhi_u16x2_i32_iii( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, u8x4(0,0,0,3), u8x4(0,0,4,210), u16x2(0,11); +define i32 @test_sadhi_u16x2_i32_iii() #0 { + %val = call i32 @llvm.hsail.sadhi(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_sadhi_u16x2_i32( +; HSAIL: sadhi_u16x2_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_sadhi_u16x2_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.sadhi.u16x2.u8x4(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.sbitextract.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.sbitextract.ll @@ -0,0 +1,71 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.sbitextract.i32(i32, i32, i32) #0 +declare i64 @llvm.hsail.sbitextract.i64(i64, i32, i32) #0 + +declare i32 @llvm.HSAIL.ibfe(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_sbitextract_i32( +; HSAIL: bitextract_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sbitextract_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.sbitextract.i32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitextract_i32_rii( +; HSAIL: bitextract_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, 2; +define i32 @test_sbitextract_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.sbitextract.i32(i32 %x, i32 1, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitextract_i32_rir( +; HSAIL: bitextract_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_sbitextract_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sbitextract.i32(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitextract_i32_rri( +; HSAIL: bitextract_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_sbitextract_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.sbitextract.i32(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitextract_i32_iri( +; HSAIL: bitextract_s32 {{\$s[0-9]+}}, 3, {{\$s[0-9]+}}, 7; +define i32 @test_sbitextract_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.sbitextract.i32(i32 3, i32 %x, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitextract_i32_iir( +; HSAIL: bitextract_s32 {{\$s[0-9]+}}, 3, 9, {{\$s[0-9]+}}; +define i32 @test_sbitextract_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.sbitextract.i32(i32 3, i32 9, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitextract_i32_iii( +; HSAIL: bitextract_s32 {{\$s[0-9]+}}, 3, 9, 11; +define i32 @test_sbitextract_i32_iii() #0 { + %val = call i32 @llvm.hsail.sbitextract.i32(i32 3, i32 9, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitextract_i64( +; HSAIL: bitextract_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i64 @test_sbitextract_i64(i64 %x, i32 %y, i32 %z) #0 { + %val = call i64 @llvm.hsail.sbitextract.i64(i64 %x, i32 %y, i32 %z) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_ibfe_i32( +; HSAIL: bitextract_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_ibfe_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.ibfe(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.sbitinsert.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.sbitinsert.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.sbitinsert.i32(i32, i32, i32, i32) #0 +declare i64 @llvm.hsail.sbitinsert.i64(i64, i64, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_sbitinsert_i32( +; HSAIL: bitinsert_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sbitinsert_i32(i32 %x, i32 %y, i32 %z, i32 %w) #0 { + %val = call i32 @llvm.hsail.sbitinsert.i32(i32 %x, i32 %y, i32 %z, i32 %w) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitinsert_i32_riii( +; HSAIL: bitinsert_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, 2, 3; +define i32 @test_sbitinsert_i32_riii(i32 %x) #0 { + %val = call i32 @llvm.hsail.sbitinsert.i32(i32 %x, i32 1, i32 2, i32 3) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitinsert_i32_rirr( +; HSAIL: bitinsert_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sbitinsert_i32_rirr(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.sbitinsert.i32(i32 %x, i32 1, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitinsert_i32_rrri( +; HSAIL: bitinsert_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_sbitinsert_i32_rrri(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.sbitinsert.i32(i32 %x, i32 %y, i32 %z, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitinsert_i32_iiii( +; HSAIL: bitinsert_s32 {{\$s[0-9]+}}, 3, 9, 11, 7; +define i32 @test_sbitinsert_i32_iiii() #0 { + %val = call i32 @llvm.hsail.sbitinsert.i32(i32 3, i32 9, i32 11, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_sbitinsert_i64( +; HSAIL: bitinsert_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i64 @test_sbitinsert_i64(i64 %x, i64 %y, i32 %z, i32 %w) #0 { + %val = call i64 @llvm.hsail.sbitinsert.i64(i64 %x, i64 %y, i32 %z, i32 %w) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.segmentp.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.segmentp.ll @@ -0,0 +1,135 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL32 -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + +declare i1 @llvm.hsail.segmentp(i32, i1, i8 addrspace(4)*) #0 + +declare i1 @llvm.HSAIL.segmentp.global.p4i8(i8 addrspace(4)*) #0 +declare i1 @llvm.HSAIL.segmentp.local.p4i8(i8 addrspace(4)*) #0 +declare i1 @llvm.HSAIL.segmentp.private.p4i8(i8 addrspace(4)*) #0 + +; HSAIL-LABEL: {{^}}prog function &test_segmentp_global_nonull( +; HSAIL32: segmentp_global_nonull_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL64: segmentp_global_nonull_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_segmentp_global_nonull(i8 addrspace(4)* %ptr) #0 { + %val = call i1 @llvm.hsail.segmentp(i32 1, i1 true, i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_segmentp_global( +; HSAIL32: segmentp_global_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL64: segmentp_global_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_segmentp_global(i8 addrspace(4)* %ptr) #0 { + %val = call i1 @llvm.hsail.segmentp(i32 1, i1 false, i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_segmentp_local( +; HSAIL32: segmentp_group_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL64: segmentp_group_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_segmentp_local(i8 addrspace(4)* %ptr) #0 { + %val = call i1 @llvm.hsail.segmentp(i32 3, i1 false, i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_segmentp_private( +; HSAIL32: segmentp_private_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL64: segmentp_private_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_segmentp_private(i8 addrspace(4)* %ptr) #0 { + %val = call i1 @llvm.hsail.segmentp(i32 0, i1 false, i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_segmentp_global_imm( +; HSAIL32: segmentp_global_b1_u32 {{\$c[0-9]+}}, 12345; +; HSAIL64: segmentp_global_b1_u64 {{\$c[0-9]+}}, 12345; +define i32 @test_segmentp_global_imm() #0 { + %ptr = inttoptr i32 12345 to i8 addrspace(4)* + %val = call i1 @llvm.hsail.segmentp(i32 1, i1 false, i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_segmentp_local_imm( +; HSAIL32: segmentp_group_b1_u32 {{\$c[0-9]+}}, 12345; +; HSAIL64: segmentp_group_b1_u64 {{\$c[0-9]+}}, 12345; +define i32 @test_segmentp_local_imm() #0 { + %ptr = inttoptr i32 12345 to i8 addrspace(4)* + %val = call i1 @llvm.hsail.segmentp(i32 3, i1 false, i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_segmentp_private_imm( +; HSAIL32: segmentp_private_b1_u32 {{\$c[0-9]+}}, 12345; +; HSAIL64: segmentp_private_b1_u64 {{\$c[0-9]+}}, 12345; +define i32 @test_segmentp_private_imm() #0 { + %ptr = inttoptr i32 12345 to i8 addrspace(4)* + %val = call i1 @llvm.hsail.segmentp(i32 0, i1 false, i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + + + +; HSAIL-LABEL: {{^}}prog function &test_legacy_segmentp_global( +; HSAIL32: segmentp_global_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL64: segmentp_global_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_legacy_segmentp_global(i8 addrspace(4)* %ptr) #0 { + %val = call i1 @llvm.HSAIL.segmentp.global.p4i8(i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_segmentp_local( +; HSAIL32: segmentp_group_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL64: segmentp_group_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_legacy_segmentp_local(i8 addrspace(4)* %ptr) #0 { + %val = call i1 @llvm.HSAIL.segmentp.local.p4i8(i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_segmentp_private( +; HSAIL32: segmentp_private_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL64: segmentp_private_b1_u64 {{\$c[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_legacy_segmentp_private(i8 addrspace(4)* %ptr) #0 { + %val = call i1 @llvm.HSAIL.segmentp.private.p4i8(i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_segmentp_global_imm( +; HSAIL32: segmentp_global_b1_u32 {{\$c[0-9]+}}, 12345; +; HSAIL64: segmentp_global_b1_u64 {{\$c[0-9]+}}, 12345; +define i32 @test_legacy_segmentp_global_imm() #0 { + %ptr = inttoptr i32 12345 to i8 addrspace(4)* + %val = call i1 @llvm.HSAIL.segmentp.global.p4i8(i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_segmentp_local_imm( +; HSAIL32: segmentp_group_b1_u32 {{\$c[0-9]+}}, 12345; +; HSAIL64: segmentp_group_b1_u64 {{\$c[0-9]+}}, 12345; +define i32 @test_legacy_segmentp_local_imm() #0 { + %ptr = inttoptr i32 12345 to i8 addrspace(4)* + %val = call i1 @llvm.HSAIL.segmentp.local.p4i8(i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_segmentp_private_imm( +; HSAIL32: segmentp_private_b1_u32 {{\$c[0-9]+}}, 12345; +; HSAIL64: segmentp_private_b1_u64 {{\$c[0-9]+}}, 12345; +define i32 @test_legacy_segmentp_private_imm() #0 { + %ptr = inttoptr i32 12345 to i8 addrspace(4)* + %val = call i1 @llvm.HSAIL.segmentp.private.p4i8(i8 addrspace(4)* %ptr) #0 + %ext = sext i1 %val to i32 + ret i32 %ext +} + +attributes #0 = { nounwind readnone} Index: test/CodeGen/HSAIL/llvm.hsail.sfirstbit.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.sfirstbit.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.sfirstbit.i32(i32) #0 +declare i32 @llvm.hsail.sfirstbit.i64(i64) #0 + +; HSAIL-LABEL: {{^}}prog function &test_sfirstbit_i32( +; HSAIL: firstbit_u32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_sfirstbit_i32(i32 %x) #0 { + %ret = call i32 @llvm.hsail.sfirstbit.i32(i32 %x) #0 + ret i32 %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_sfirstbit_i64( +; HSAIL: firstbit_u32_s64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_sfirstbit_i64(i64 %x) #0 { + %ret = call i32 @llvm.hsail.sfirstbit.i64(i64 %x) #0 + ret i32 %ret +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.smad24.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.smad24.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.smad24(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.mad24.s32(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_smad24( +; HSAIL: mad24_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_smad24(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.smad24(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mad24_s32( +; HSAIL: mad24_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_mad24_s32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.mad24.s32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.smul24.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.smul24.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.smul24(i32, i32) #0 +declare i32 @llvm.HSAIL.mul24.s32(i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_mul24_s32( +; HSAIL: mul24_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_mul24_s32(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.smul24(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_mul24_s32( +; HSAIL: mul24_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_mul24_s32(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.mul24.s32(i32 %x, i32 %y) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.ubitextract.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.ubitextract.ll @@ -0,0 +1,71 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.ubitextract.i32(i32, i32, i32) #0 +declare i64 @llvm.hsail.ubitextract.i64(i64, i32, i32) #0 + +declare i32 @llvm.HSAIL.bfe(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_ubitextract_i32( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_ubitextract_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.ubitextract.i32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitextract_i32_rii( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, 2; +define i32 @test_ubitextract_i32_rii(i32 %x) #0 { + %val = call i32 @llvm.hsail.ubitextract.i32(i32 %x, i32 1, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitextract_i32_rir( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_ubitextract_i32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.ubitextract.i32(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitextract_i32_rri( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_ubitextract_i32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.ubitextract.i32(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitextract_i32_iri( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, 3, {{\$s[0-9]+}}, 7; +define i32 @test_ubitextract_i32_iri(i32 %x) #0 { + %val = call i32 @llvm.hsail.ubitextract.i32(i32 3, i32 %x, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitextract_i32_iir( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, 3, 9, {{\$s[0-9]+}}; +define i32 @test_ubitextract_i32_iir(i32 %x) #0 { + %val = call i32 @llvm.hsail.ubitextract.i32(i32 3, i32 9, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitextract_i32_iii( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, 3, 9, 11; +define i32 @test_ubitextract_i32_iii() #0 { + %val = call i32 @llvm.hsail.ubitextract.i32(i32 3, i32 9, i32 11) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitextract_i64( +; HSAIL: bitextract_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i64 @test_ubitextract_i64(i64 %x, i32 %y, i32 %z) #0 { + %val = call i64 @llvm.hsail.ubitextract.i64(i64 %x, i32 %y, i32 %z) #0 + ret i64 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_bfe_i32( +; HSAIL: bitextract_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_bfe_i32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.bfe(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.ubitinsert.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.ubitinsert.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.ubitinsert.i32(i32, i32, i32, i32) #0 +declare i64 @llvm.hsail.ubitinsert.i64(i64, i64, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_ubitinsert_i32( +; HSAIL: bitinsert_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_ubitinsert_i32(i32 %x, i32 %y, i32 %z, i32 %w) #0 { + %val = call i32 @llvm.hsail.ubitinsert.i32(i32 %x, i32 %y, i32 %z, i32 %w) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitinsert_i32_riii( +; HSAIL: bitinsert_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, 2, 3; +define i32 @test_ubitinsert_i32_riii(i32 %x) #0 { + %val = call i32 @llvm.hsail.ubitinsert.i32(i32 %x, i32 1, i32 2, i32 3) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitinsert_i32_rirr( +; HSAIL: bitinsert_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_ubitinsert_i32_rirr(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.ubitinsert.i32(i32 %x, i32 1, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitinsert_i32_rrri( +; HSAIL: bitinsert_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_ubitinsert_i32_rrri(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.ubitinsert.i32(i32 %x, i32 %y, i32 %z, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitinsert_i32_iiii( +; HSAIL: bitinsert_u32 {{\$s[0-9]+}}, 3, 9, 11, 7; +define i32 @test_ubitinsert_i32_iiii() #0 { + %val = call i32 @llvm.hsail.ubitinsert.i32(i32 3, i32 9, i32 11, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_ubitinsert_i64( +; HSAIL: bitinsert_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i64 @test_ubitinsert_i64(i64 %x, i64 %y, i32 %z, i32 %w) #0 { + %val = call i64 @llvm.hsail.ubitinsert.i64(i64 %x, i64 %y, i32 %z, i32 %w) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.ufirstbit.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.ufirstbit.ll @@ -0,0 +1,28 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.ufirstbit.i32(i32) #0 +declare i32 @llvm.hsail.ufirstbit.i64(i64) #0 +declare i32 @llvm.HSAIL.firstbit.u32(i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_ufirstbit_i32( +; HSAIL: firstbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_ufirstbit_i32(i32 %x) #0 { + %ret = call i32 @llvm.hsail.ufirstbit.i32(i32 %x) #0 + ret i32 %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_ufirstbit_i64( +; HSAIL: firstbit_u32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define i32 @test_ufirstbit_i64(i64 %x) #0 { + %ret = call i32 @llvm.hsail.ufirstbit.i64(i64 %x) #0 + ret i32 %ret +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_ufirstbit_i32( +; HSAIL: firstbit_u32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_ufirstbit_i32(i32 %x) #0 { + %ret = call i32 @llvm.HSAIL.firstbit.u32(i32 %x) #0 + ret i32 %ret +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.umad24.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.umad24.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.umad24(i32, i32, i32) #0 +declare i32 @llvm.HSAIL.mad24.u32(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_umad24( +; HSAIL: mad24_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_umad24(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.hsail.umad24(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_mad24_u32( +; HSAIL: mad24_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_mad24_u32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.mad24.u32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.umul24.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.umul24.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.hsail.umul24(i32, i32) #0 +declare i32 @llvm.HSAIL.mul24.u32(i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_mul24_u32( +; HSAIL: mul24_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_mul24_u32(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.hsail.umul24(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_mul24_u32( +; HSAIL: mul24_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_legacy_mul24_u32(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.mul24.u32(i32 %x, i32 %y) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.unpackcvt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.unpackcvt.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.hsail.unpackcvt(i32, i32) #0 +declare float @llvm.HSAIL.unpackcvt.f32.u8x4(i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_unpackcvt_f32_u8x4_0( +; HSAIL: unpackcvt_f32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0; +define float @test_unpackcvt_f32_u8x4_0(i32 %x) #0 { + %val = call float @llvm.hsail.unpackcvt(i32 %x, i32 0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_unpackcvt_f32_u8x4_1( +; HSAIL: unpackcvt_f32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1; +define float @test_unpackcvt_f32_u8x4_1(i32 %x) #0 { + %val = call float @llvm.hsail.unpackcvt(i32 %x, i32 1) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_unpackcvt_f32_u8x4_2( +; HSAIL: unpackcvt_f32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 2; +define float @test_unpackcvt_f32_u8x4_2(i32 %x) #0 { + %val = call float @llvm.hsail.unpackcvt(i32 %x, i32 2) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_unpackcvt_f32_u8x4_3( +; HSAIL: unpackcvt_f32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 3; +define float @test_unpackcvt_f32_u8x4_3(i32 %x) #0 { + %val = call float @llvm.hsail.unpackcvt(i32 %x, i32 3) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_unpackcvt_f32_u8x4_0_imm( +; HSAIL: unpackcvt_f32_u8x4 {{\$s[0-9]+}}, u8x4(0,0,48,57), 0; +define float @test_unpackcvt_f32_u8x4_0_imm() #0 { + %val = call float @llvm.hsail.unpackcvt(i32 12345, i32 0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_unpackcvt_f32_u8x4_0( +; HSAIL: unpackcvt_f32_u8x4 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0; +define float @test_legacy_unpackcvt_f32_u8x4_0(i32 %x) #0 { + %val = call float @llvm.HSAIL.unpackcvt.f32.u8x4(i32 %x, i32 0) #0 + ret float %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.hsail.wavebarrier.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.wavebarrier.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &test_wavebarrier( +; HSAIL: wavebarrier; +define void @test_wavebarrier(i32 addrspace(1)* %out) #0 { + call void @llvm.hsail.wavebarrier() #0 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_legacy_wavebarrier( +; HSAIL: wavebarrier; +define void @test_legacy_wavebarrier(i32 addrspace(1)* %out) #1 { + call void @llvm.HSAIL.wavebarrier() #1 + ret void +} + +declare void @llvm.hsail.wavebarrier() #0 +declare void @llvm.HSAIL.wavebarrier() #1 + +attributes #0 = { nounwind noduplicate convergent } +attributes #1 = { nounwind noduplicate } Index: test/CodeGen/HSAIL/llvm.hsail.waveid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.waveid.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.waveid() #0 +declare i32 @llvm.HSAIL.get.dynwave.id() #0 + +; FUNC-LABEL: {{^}}prog function &test_waveid( +; HSAIL: waveid_u32 {{\$s[0-9]+}}; +define void @test_waveid(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.waveid() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_get_dynwave_id( +; HSAIL: waveid_u32 {{\$s[0-9]+}}; +define void @test_legacy_get_dynwave_id(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.dynwave.id() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.workgroupid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.workgroupid.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.workgroupid(i32) #0 +declare i32 @llvm.HSAIL.get.group.id(i32) #0 + +; FUNC-LABEL: {{^}}prog function &test_workgroupid_0( +; HSAIL: workgroupid_u32 {{\$s[0-9]+}}, 0; +define void @test_workgroupid_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workgroupid(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workgroupid_1( +; HSAIL: workgroupid_u32 {{\$s[0-9]+}}, 1; +define void @test_workgroupid_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workgroupid(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workgroupid_2( +; HSAIL: workgroupid_u32 {{\$s[0-9]+}}, 2; +define void @test_workgroupid_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workgroupid(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_currentworkgroup_size_0( +; HSAIL: workgroupid_u32 {{\$s[0-9]+}}, 0; +define void @legacy_currentworkgroup_size_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.group.id(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_currentworkgroup_size_1( +; HSAIL: workgroupid_u32 {{\$s[0-9]+}}, 1; +define void @legacy_currentworkgroup_size_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.group.id(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_currentworkgroup_size_2( +; HSAIL: workgroupid_u32 {{\$s[0-9]+}}, 2; +define void @legacy_currentworkgroup_size_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.group.id(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.workgroupsize.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.workgroupsize.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.workgroupsize(i32) #0 +declare i32 @llvm.HSAIL.workgroup.size(i32) #0 + +; FUNC-LABEL: {{^}}prog function &test_workgroupsize_0( +; HSAIL: workgroupsize_u32 {{\$s[0-9]+}}, 0; +define void @test_workgroupsize_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workgroupsize(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workgroupsize_1( +; HSAIL: workgroupsize_u32 {{\$s[0-9]+}}, 1; +define void @test_workgroupsize_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workgroupsize(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workgroupsize_2( +; HSAIL: workgroupsize_u32 {{\$s[0-9]+}}, 2; +define void @test_workgroupsize_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workgroupsize(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_workgroup_size_0( +; HSAIL: workgroupsize_u32 {{\$s[0-9]+}}, 0; +define void @legacy_workgroup_size_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.workgroup.size(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_workgroup_size_1( +; HSAIL: workgroupsize_u32 {{\$s[0-9]+}}, 1; +define void @legacy_workgroup_size_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.workgroup.size(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &legacy_workgroup_size_2( +; HSAIL: workgroupsize_u32 {{\$s[0-9]+}}, 2; +define void @legacy_workgroup_size_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.workgroup.size(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.workitemabsid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.workitemabsid.ll @@ -0,0 +1,84 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.workitemabsid.i32(i32) #0 +declare i64 @llvm.hsail.workitemabsid.i64(i32) #0 + +declare i32 @llvm.HSAIL.get.global.id(i32) #0 + + +; FUNC-LABEL: {{^}}prog function &test_workitemabsid_i32_0 +; HSAIL: workitemabsid_u32 {{\$s[0-9]+}}, 0; +define void @test_workitemabsid_i32_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workitemabsid.i32(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workitemabsid_i32_1 +; HSAIL: workitemabsid_u32 {{\$s[0-9]+}}, 1; +define void @test_workitemabsid_i32_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workitemabsid.i32(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workitemabsid_i32_2 +; HSAIL: workitemabsid_u32 {{\$s[0-9]+}}, 2; +define void @test_workitemabsid_i32_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workitemabsid.i32(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workitemabsid_i64_0 +; HSAIL: workitemabsid_u64 {{\$d[0-9]+}}, 0; +define void @test_workitemabsid_i64_0(i64 addrspace(1)* %out) #1 { + %tmp0 = call i64 @llvm.hsail.workitemabsid.i64(i32 0) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workitemabsid_i64_1 +; HSAIL: workitemabsid_u64 {{\$d[0-9]+}}, 1; +define void @test_workitemabsid_i64_1(i64 addrspace(1)* %out) #1 { + %tmp0 = call i64 @llvm.hsail.workitemabsid.i64(i32 1) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workitemabsid_i64_2 +; HSAIL: workitemabsid_u64 {{\$d[0-9]+}}, 2; +define void @test_workitemabsid_i64_2(i64 addrspace(1)* %out) #1 { + %tmp0 = call i64 @llvm.hsail.workitemabsid.i64(i32 2) #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_workitemabsid_0 +; HSAIL: workitemabsid_u32 {{\$s[0-9]+}}, 0; +define void @test_legacy_workitemabsid_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_workitemabsid_1 +; HSAIL: workitemabsid_u32 {{\$s[0-9]+}}, 1; +define void @test_legacy_workitemabsid_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.global.id(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_workitemabsid_2 +; HSAIL: workitemabsid_u32 {{\$s[0-9]+}}, 2; +define void @test_legacy_workitemabsid_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.global.id(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + + Index: test/CodeGen/HSAIL/llvm.hsail.workitemflatabsid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.workitemflatabsid.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.workitemflatabsid.i32() #0 +declare i64 @llvm.hsail.workitemflatabsid.i64() #0 + +declare i32 @llvm.HSAIL.workitemid.flatabs() #0 + +; FUNC-LABEL: {{^}}prog function &test_workitemflatabsid_i32( +; HSAIL: workitemflatabsid_u32 {{\$s[0-9]+}}; +define void @test_workitemflatabsid_i32(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workitemflatabsid.i32() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workitemflatabsid_i64( +; HSAIL: workitemflatabsid_u64 {{\$d[0-9]+}}; +define void @test_workitemflatabsid_i64(i64 addrspace(1)* %out) #1 { + %tmp0 = call i64 @llvm.hsail.workitemflatabsid.i64() #0 + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_workitemid_flatabs( +; HSAIL: workitemflatabsid_u32 {{\$s[0-9]+}}; +define void @test_legacy_workitemid_flatabs(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.workitemid.flatabs() #1 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.workitemflatid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.workitemflatid.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.workitemflatid() #0 +declare i32 @llvm.HSAIL.workitemid.flat() #0 + +; FUNC-LABEL: {{^}}prog function &test_workitemflatid( +; HSAIL: workitemflatid_u32 {{\$s[0-9]+}}; +define void @test_workitemflatid(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workitemflatid() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_workitemid_flat( +; HSAIL: workitemflatid_u32 {{\$s[0-9]+}}; +define void @test_legacy_workitemid_flat(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.workitemid.flat() #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.hsail.workitemid.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.hsail.workitemid.ll @@ -0,0 +1,55 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.hsail.workitemid(i32) #0 +declare i32 @llvm.HSAIL.get.local.id(i32) #0 + +; FUNC-LABEL: {{^}}prog function &test_workitemid_0( +; HSAIL: workitemid_u32 {{\$s[0-9]+}}, 0; +define void @test_workitemid_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workitemid(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workitemid_1( +; HSAIL: workitemid_u32 {{\$s[0-9]+}}, 1; +define void @test_workitemid_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workitemid(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_workitemid_2( +; HSAIL: workitemid_u32 {{\$s[0-9]+}}, 2; +define void @test_workitemid_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.hsail.workitemid(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_get_local_id_0( +; HSAIL: workitemid_u32 {{\$s[0-9]+}}, 0; +define void @test_legacy_get_local_id_0(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.local.id(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_get_local_id_1( +; HSAIL: workitemid_u32 {{\$s[0-9]+}}, 1; +define void @test_legacy_get_local_id_1(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.local.id(i32 1) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_legacy_get_local_id_2( +; HSAIL: workitemid_u32 {{\$s[0-9]+}}, 2; +define void @test_legacy_get_local_id_2(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.local.id(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.HSAIL.class.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.class.ll @@ -0,0 +1,63 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.class.f32(float, i32) #0 +declare i32 @llvm.HSAIL.class.f64(double, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_class_f32( +; HSAIL: class_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_class_f32(float %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.class.f32(float %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_class_f32_ii( +; HSAIL: class_b1_f32 {{\$c[0-9]+}}, 0F41000000, 9; +define i32 @test_class_f32_ii() #0 { + %val = call i32 @llvm.HSAIL.class.f32(float 8.0, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_class_f32_ri( +; HSAIL: class_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 9; +define i32 @test_class_f32_ri(float %x) #0 { + %val = call i32 @llvm.HSAIL.class.f32(float %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_class_f32_ir( +; HSAIL: class_b1_f32 {{\$c[0-9]+}}, 0F41000000, {{\$s[0-9]+}}; +define i32 @test_class_f32_ir(i32 %y) #0 { + %val = call i32 @llvm.HSAIL.class.f32(float 8.0, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_class_f64( +; HSAIL: class_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_class_f64(double %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.class.f64(double %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_class_f64_ii( +; HSAIL: class_b1_f64 {{\$c[0-9]+}}, 0D4020000000000000, 9; +define i32 @test_class_f64_ii() #0 { + %val = call i32 @llvm.HSAIL.class.f64(double 8.0, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_class_f64_ri( +; HSAIL: class_b1_f64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, 9; +define i32 @test_class_f64_ri(double %x) #0 { + %val = call i32 @llvm.HSAIL.class.f64(double %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_class_f64_ir( +; HSAIL: class_b1_f64 {{\$c[0-9]+}}, 0D4020000000000000, {{\$s[0-9]+}}; +define i32 @test_class_f64_ir(i32 %y) #0 { + %val = call i32 @llvm.HSAIL.class.f64(double 8.0, i32 %y) #0 + ret i32 %val +} + + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.div.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.div.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.div.f32(float, float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_div_f32( +; HSAIL: div_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_div_f32(float %x, float %y) #0 { + %val = call float @llvm.HSAIL.div.f32(float %x, float %y) #0 + ret float %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.fmax3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.fmax3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.fmax3(float, float, float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_max3_f32( +; HSAIL: gcn_max3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_max3_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.HSAIL.fmax3(float %x, float %y, float %z) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_f32_rii( +; HSAIL: gcn_max3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F449a4000, 0F40000000; +define float @test_max3_f32_rii(float %x) #0 { + %val = call float @llvm.HSAIL.fmax3(float %x, float 1234.0, float 2.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_f32_rir( +; HSAIL: gcn_max3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, {{\$s[0-9]+}}; +define float @test_max3_f32_rir(float %x, float %y) #0 { + %val = call float @llvm.HSAIL.fmax3(float %x, float 1.0, float %y) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_f32_rri( +; HSAIL: gcn_max3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F41000000; +define float @test_max3_f32_rri(float %x, float %y) #0 { + %val = call float @llvm.HSAIL.fmax3(float %x, float %y, float 8.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_f32_iri( +; HSAIL: gcn_max3_f32 {{\$s[0-9]+}}, 0F48a732a0, {{\$s[0-9]+}}, 0F41100000; +define float @test_max3_f32_iri(float %x) #0 { + %val = call float @llvm.HSAIL.fmax3(float 342421.0, float %x, float 9.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_f32_iir( +; HSAIL: gcn_max3_f32 {{\$s[0-9]+}}, 0F43800000, 0F449a4000, {{\$s[0-9]+}}; +define float @test_max3_f32_iir(float %x) #0 { + %val = call float @llvm.HSAIL.fmax3(float 256.0, float 1234.0, float %x) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_f32_iii( +; HSAIL: gcn_max3_f32 {{\$s[0-9]+}}, 0F40400000, 0F449a4000, 0F41300000; +define float @test_max3_f32_iii() #0 { + %val = call float @llvm.HSAIL.fmax3(float 3.0, float 1234.0, float 11.0) #0 + ret float %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.fmed3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.fmed3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.fmed3(float, float, float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_med3_f32( +; HSAIL: gcn_med3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_med3_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.HSAIL.fmed3(float %x, float %y, float %z) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_f32_rii( +; HSAIL: gcn_med3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F449a4000, 0F40000000; +define float @test_med3_f32_rii(float %x) #0 { + %val = call float @llvm.HSAIL.fmed3(float %x, float 1234.0, float 2.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_f32_rir( +; HSAIL: gcn_med3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, {{\$s[0-9]+}}; +define float @test_med3_f32_rir(float %x, float %y) #0 { + %val = call float @llvm.HSAIL.fmed3(float %x, float 1.0, float %y) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_f32_rri( +; HSAIL: gcn_med3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F41000000; +define float @test_med3_f32_rri(float %x, float %y) #0 { + %val = call float @llvm.HSAIL.fmed3(float %x, float %y, float 8.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_f32_iri( +; HSAIL: gcn_med3_f32 {{\$s[0-9]+}}, 0F48a732a0, {{\$s[0-9]+}}, 0F41100000; +define float @test_med3_f32_iri(float %x) #0 { + %val = call float @llvm.HSAIL.fmed3(float 342421.0, float %x, float 9.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_f32_iir( +; HSAIL: gcn_med3_f32 {{\$s[0-9]+}}, 0F43800000, 0F449a4000, {{\$s[0-9]+}}; +define float @test_med3_f32_iir(float %x) #0 { + %val = call float @llvm.HSAIL.fmed3(float 256.0, float 1234.0, float %x) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_f32_iii( +; HSAIL: gcn_med3_f32 {{\$s[0-9]+}}, 0F40400000, 0F449a4000, 0F41300000; +define float @test_med3_f32_iii() #0 { + %val = call float @llvm.HSAIL.fmed3(float 3.0, float 1234.0, float 11.0) #0 + ret float %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.fmin3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.fmin3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.fmin3(float, float, float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_min3_f32( +; HSAIL: gcn_min3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_min3_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.HSAIL.fmin3(float %x, float %y, float %z) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_f32_rii( +; HSAIL: gcn_min3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F449a4000, 0F40000000; +define float @test_min3_f32_rii(float %x) #0 { + %val = call float @llvm.HSAIL.fmin3(float %x, float 1234.0, float 2.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_f32_rir( +; HSAIL: gcn_min3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000, {{\$s[0-9]+}}; +define float @test_min3_f32_rir(float %x, float %y) #0 { + %val = call float @llvm.HSAIL.fmin3(float %x, float 1.0, float %y) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_f32_rri( +; HSAIL: gcn_min3_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F41000000; +define float @test_min3_f32_rri(float %x, float %y) #0 { + %val = call float @llvm.HSAIL.fmin3(float %x, float %y, float 8.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_f32_iri( +; HSAIL: gcn_min3_f32 {{\$s[0-9]+}}, 0F48a732a0, {{\$s[0-9]+}}, 0F41100000; +define float @test_min3_f32_iri(float %x) #0 { + %val = call float @llvm.HSAIL.fmin3(float 342421.0, float %x, float 9.0) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_f32_iir( +; HSAIL: gcn_min3_f32 {{\$s[0-9]+}}, 0F43800000, 0F449a4000, {{\$s[0-9]+}}; +define float @test_min3_f32_iir(float %x) #0 { + %val = call float @llvm.HSAIL.fmin3(float 256.0, float 1234.0, float %x) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_f32_iii( +; HSAIL: gcn_min3_f32 {{\$s[0-9]+}}, 0F40400000, 0F449a4000, 0F41300000; +define float @test_min3_f32_iii() #0 { + %val = call float @llvm.HSAIL.fmin3(float 3.0, float 1234.0, float 11.0) #0 + ret float %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.fract.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.fract.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.fract.f32(float) #0 +declare double @llvm.HSAIL.fract.f64(double) #0 + +; HSAIL-LABEL: {{^}}prog function &fract_f32( +; HSAIL: fract_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 { + %val = load float, float addrspace(1)* %src, align 4 + %fract = call float @llvm.HSAIL.fract.f32(float %val) #0 + store float %fract, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &fract_f64( +; HSAIL: fract_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @fract_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 { + %val = load double, double addrspace(1)* %src, align 4 + %fract = call double @llvm.HSAIL.fract.f64(double %val) #0 + store double %fract, double addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.HSAIL.ftz.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.ftz.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.ftz.f32(float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_ftz_f32( +; HSAIL: add_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +define float @test_ftz_f32(float %x) #0 { + %sqrt = call float @llvm.HSAIL.ftz.f32(float %x) #0 + ret float %sqrt +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.gcn.atomic.append.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.gcn.atomic.append.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.gcn.atomic.append.u32(i32 addrspace(5)*) #1 + +; HSAIL-LABEL: {{^}}prog function &test_atomic_append_u32( +; HSAIL: gcn_atomic_append_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+32]; +define void @test_atomic_append_u32(i32 addrspace(1)* %out, i32 addrspace(5)* %in) #1 { + %gep = getelementptr i32, i32 addrspace(5)* %in, i32 8 + %val = call i32 @llvm.HSAIL.gcn.atomic.append.u32(i32 addrspace(5)* %gep) #1 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.HSAIL.gcn.atomic.consume.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.gcn.atomic.consume.ll @@ -0,0 +1,15 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.gcn.atomic.consume.u32(i32 addrspace(5)*) #1 + +; HSAIL-LABEL: {{^}}prog function &test_atomic_consume_u32( +; HSAIL: gcn_atomic_consume_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+32]; +define void @test_atomic_consume_u32(i32 addrspace(1)* %out, i32 addrspace(5)* %in) #1 { + %gep = getelementptr i32, i32 addrspace(5)* %in, i32 8 + %val = call i32 @llvm.HSAIL.gcn.atomic.consume.u32(i32 addrspace(5)* %gep) #1 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.HSAIL.gcn.fldexp.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.gcn.fldexp.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=hsail -mattr=+gcn < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.gcn.fldexp.f32(float, i32) #0 +declare double @llvm.HSAIL.gcn.fldexp.f64(double, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_gcn_fldexp_f32_rr( +; HSAIL: gcn_fldexp_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define float @test_gcn_fldexp_f32_rr(float %x, i32 %y) #0 { + %val = call float @llvm.HSAIL.gcn.fldexp.f32(float %x, i32 %y) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_gcn_fldexp_f32_ri( +; HSAIL: gcn_fldexp_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 9; +define float @test_gcn_fldexp_f32_ri(float %x) #0 { + %val = call float @llvm.HSAIL.gcn.fldexp.f32(float %x, i32 9) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_gcn_fldexp_f32_ir( +; HSAIL: gcn_fldexp_f32 {{\$s[0-9]+}}, 0F41200000, {{\$s[0-9]+}}; +define float @test_gcn_fldexp_f32_ir(i32 %y) #0 { + %val = call float @llvm.HSAIL.gcn.fldexp.f32(float 10.0, i32 %y) #0 + ret float %val +} + +; FIXME: This should constant fold +; HSAIL-LABEL: {{^}}prog function &test_gcn_fldexp_f32_ii( +; HSAIL: gcn_fldexp_f32 {{\$s[0-9]+}}, 0F41200000, 8; +define float @test_gcn_fldexp_f32_ii(i32 %y) #0 { + %val = call float @llvm.HSAIL.gcn.fldexp.f32(float 10.0, i32 8) #0 + ret float %val +} + +; HSAIL-LABEL: {{^}}prog function &test_gcn_fldexp_f64_rr( +; HSAIL: gcn_fldexp_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define double @test_gcn_fldexp_f64_rr(double %x, i32 %y) #0 { + %val = call double @llvm.HSAIL.gcn.fldexp.f64(double %x, i32 %y) #0 + ret double %val +} + +; HSAIL-LABEL: {{^}}prog function &test_gcn_fldexp_f64_ri( +; HSAIL: gcn_fldexp_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 9; +define double @test_gcn_fldexp_f64_ri(double %x) #0 { + %val = call double @llvm.HSAIL.gcn.fldexp.f64(double %x, i32 9) #0 + ret double %val +} + +; HSAIL-LABEL: {{^}}prog function &test_gcn_fldexp_f64_ir( +; HSAIL: gcn_fldexp_f64 {{\$d[0-9]+}}, 0D4024000000000000, {{\$s[0-9]+}}; +define double @test_gcn_fldexp_f64_ir(i32 %y) #0 { + %val = call double @llvm.HSAIL.gcn.fldexp.f64(double 10.0, i32 %y) #0 + ret double %val +} + +; FIXME: This should constant fold +; HSAIL-LABEL: {{^}}prog function &test_gcn_fldexp_f64_ii( +; HSAIL: gcn_fldexp_f64 {{\$d[0-9]+}}, 0D4024000000000000, 8; +define double @test_gcn_fldexp_f64_ii(i32 %y) #0 { + %val = call double @llvm.HSAIL.gcn.fldexp.f64(double 10.0, i32 8) #0 + ret double %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.imax3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.imax3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.imax3(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_max3_s32( +; HSAIL: gcn_max3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_max3_s32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.imax3(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_s32_rii( +; HSAIL: gcn_max3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1234, 2; +define i32 @test_max3_s32_rii(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imax3(i32 %x, i32 1234, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_s32_rir( +; HSAIL: gcn_max3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_max3_s32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.imax3(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_s32_rri( +; HSAIL: gcn_max3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_max3_s32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.imax3(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_s32_iri( +; HSAIL: gcn_max3_s32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_max3_s32_iri(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imax3(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_s32_iir( +; HSAIL: gcn_max3_s32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_max3_s32_iir(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imax3(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_s32_iii( +; HSAIL: gcn_max3_s32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_max3_s32_iii() #0 { + %val = call i32 @llvm.HSAIL.imax3(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.imed3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.imed3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.imed3(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_med3_s32( +; HSAIL: gcn_med3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_med3_s32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.imed3(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_s32_rii( +; HSAIL: gcn_med3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1234, 2; +define i32 @test_med3_s32_rii(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imed3(i32 %x, i32 1234, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_s32_rir( +; HSAIL: gcn_med3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_med3_s32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.imed3(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_s32_rri( +; HSAIL: gcn_med3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_med3_s32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.imed3(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_s32_iri( +; HSAIL: gcn_med3_s32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_med3_s32_iri(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imed3(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_s32_iir( +; HSAIL: gcn_med3_s32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_med3_s32_iir(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imed3(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_s32_iii( +; HSAIL: gcn_med3_s32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_med3_s32_iii() #0 { + %val = call i32 @llvm.HSAIL.imed3(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.imin3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.imin3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.imin3(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_min3_s32( +; HSAIL: gcn_min3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_min3_s32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.imin3(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_s32_rii( +; HSAIL: gcn_min3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1234, 2; +define i32 @test_min3_s32_rii(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imin3(i32 %x, i32 1234, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_s32_rir( +; HSAIL: gcn_min3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_min3_s32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.imin3(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_s32_rri( +; HSAIL: gcn_min3_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_min3_s32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.imin3(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_s32_iri( +; HSAIL: gcn_min3_s32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_min3_s32_iri(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imin3(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_s32_iir( +; HSAIL: gcn_min3_s32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_min3_s32_iir(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.imin3(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_s32_iii( +; HSAIL: gcn_min3_s32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_min3_s32_iii() #0 { + %val = call i32 @llvm.HSAIL.imin3(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.ld.kernarg.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.ld.kernarg.ll @@ -0,0 +1,136 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL32 -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &test_kernarg_u32( +; HSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; +; HSAIL32: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]{{\]}}; + +; HSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +; HSAIL64: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]{{\]}}; +define i32 @test_kernarg_u32() #1 { + %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 0) #0 + ret i32 %tmp0 +} + +; HSAIL-LABEL: {{^}}prog function &test_kernarg_u64( +; HSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; +; HSAIL32: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, {{\[}}[[PTR]]{{\]}}; + +; HSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +; HSAIL64: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, {{\[}}[[PTR]]{{\]}}; +define i64 @test_kernarg_u64() #1 { + %tmp0 = call i64 @llvm.HSAIL.ld.kernarg.u64(i32 0) #0 + ret i64 %tmp0 +} + +; HSAIL-LABEL: {{^}}prog function &test_kernarg_u32_offset( +; HSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; +; HSAIL32: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+16]; + +; HSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +; HSAIL64: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+16]; +define i32 @test_kernarg_u32_offset() #1 { + %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 16) #0 + ret i32 %tmp0 +} + +; HSAIL-LABEL: {{^}}prog function &test_kernarg_u64_offset( +; HSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; +; HSAIL32: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, {{\[}}[[PTR]]+32]; + +; HSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +; HSAIL64: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, {{\[}}[[PTR]]+32]; +define i64 @test_kernarg_u64_offset() #1 { + %tmp0 = call i64 @llvm.HSAIL.ld.kernarg.u64(i32 32) #0 + ret i64 %tmp0 +} + +; XHSAIL-LABEL: {{^}}prog function &test_kernarg_u32_dynamic_offset( +; XHSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; +; XHSAIL32: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+16]; + +; XHSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +; XHSAIL64: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+16]; +; define i32 @test_kernarg_u32_dynamic_offset(i32 %off) #1 { +; %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 %off) #0 +; ret i32 %tmp0 +; } + +; XHSAIL-LABEL: {{^}}prog function &test_kernarg_u64_dynamic_offset( +; XHSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; +; XHSAIL32: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, {{\[}}[[PTR]]+32]; + +; XHSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +; XHSAIL64: ld_kernarg_align(8)_width(all)_u64 {{\$d[0-9]+}}, {{\[}}[[PTR]]+32]; +; define i64 @test_kernarg_u64_dynamic_offset(i32 %off) #1 { +; %tmp0 = call i64 @llvm.HSAIL.ld.kernarg.u64(i32 %off) #0 +; ret i64 %tmp0 +; } + +; HSAIL-LABEL: {{^}}prog kernel &test_kernarg_invalid_offset_u32( +; HSAIL32: kernargbaseptr_u32 [[PTR:\$s[0-9]+]]; +; HSAIL32: ld_kernarg_align(2)_width(all)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+1234]; + +; HSAIL64: kernargbaseptr_u64 [[PTR:\$d[0-9]+]]; +; HSAIL64: ld_kernarg_align(2)_width(all)_u32 {{\$s[0-9]+}}, {{\[}}[[PTR]]+1234]; +define spir_kernel void @test_kernarg_invalid_offset_u32(i32) #1 { + %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 1234) #0 + store i32 %tmp0, i32 addrspace(1)* undef + ret void +} + +; HSAIL-LABEL: {{^}}prog kernel &test_kernarg_named_arg0_u32( +; HSAIL-NOT: kernargbaseptr +; HSAIL32: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%x]; +; HSAIL64: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%x]; +define spir_kernel void @test_kernarg_named_arg0_u32(i32 %x) #1 { + %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* undef + ret void +} + +; HSAIL-LABEL: {{^}}prog kernel &test_kernarg_anon_arg0_u32( +; HSAIL-NOT: kernargbaseptr +; HSAIL32: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%__arg_p0]; +; HSAIL64: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%__arg_p0]; +define spir_kernel void @test_kernarg_anon_arg0_u32(i32) #1 { + %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 0) #0 + store i32 %tmp0, i32 addrspace(1)* undef + ret void +} + +; HSAIL-LABEL: {{^}}prog kernel &test_kernarg_named_arg1_u32( +; HSAIL-NOT: kernargbaseptr +; HSAIL32: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%y]; +; HSAIL64: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%y]; +define spir_kernel void @test_kernarg_named_arg1_u32(i32 %x, i32 %y) #1 { + %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 4) #0 + store i32 %tmp0, i32 addrspace(1)* undef + ret void +} + +; HSAIL-LABEL: {{^}}prog kernel &test_kernarg_anon_arg1_u32( +; HSAIL-NOT: kernargbaseptr +; HSAIL32: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%__arg_p1]; +; HSAIL64: ld_kernarg_align(4)_width(all)_u32 {{\$s[0-9]+}}, [%__arg_p1]; +define spir_kernel void @test_kernarg_anon_arg1_u32(i32, i32) #1 { + %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 4) #0 + store i32 %tmp0, i32 addrspace(1)* undef + ret void +} + +; HSAIL-LABEL: {{^}}prog kernel &test_kernarg_named_mid_arg1_u32( +; HSAIL-NOT: kernargbaseptr +; HSAIL32: ld_kernarg_align(2)_width(all)_u32 {{\$s[0-9]+}}, [%x][2]; +; HSAIL64: ld_kernarg_align(2)_width(all)_u32 {{\$s[0-9]+}}, [%x][2]; +define spir_kernel void @test_kernarg_named_mid_arg1_u32(i32 %x, i32 %y) #1 { + %tmp0 = call i32 @llvm.HSAIL.ld.kernarg.u32(i32 2) #0 + store i32 %tmp0, i32 addrspace(1)* undef + ret void +} + +declare i32 @llvm.HSAIL.ld.kernarg.u32(i32) #0 +declare i64 @llvm.HSAIL.ld.kernarg.u64(i32) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.HSAIL.mad.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.mad.ll @@ -0,0 +1,21 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.mad.u32(i32, i32, i32) #0 +declare i64 @llvm.HSAIL.mad.u64(i64, i64, i64) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_mad_u32( +; HSAIL: mad_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_mad_u32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.mad.u32(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mad_u64( +; HSAIL: mad_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mad_u64(i64 %x, i64 %y, i64 %z) #0 { + %val = call i64 @llvm.HSAIL.mad.u64(i64 %x, i64 %y, i64 %z) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.max.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.max.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.max.u32(i32, i32) #0 +declare i64 @llvm.HSAIL.max.u64(i64, i64) #0 +declare i32 @llvm.HSAIL.max.s32(i32, i32) #0 +declare i64 @llvm.HSAIL.max.s64(i64, i64) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_max_u32( +; HSAIL: max_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_max_u32(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.max.u32(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max_u64( +; HSAIL: max_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_max_u64(i64 %x, i64 %y) #0 { + %val = call i64 @llvm.HSAIL.max.u64(i64 %x, i64 %y) #0 + ret i64 %val +} +; HSAIL-LABEL: {{^}}prog function &test_max_s32( +; HSAIL: max_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_max_s32(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.max.s32(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max_s64( +; HSAIL: max_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_max_s64(i64 %x, i64 %y) #0 { + %val = call i64 @llvm.HSAIL.max.s64(i64 %x, i64 %y) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.min.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.min.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.min.u32(i32, i32) #0 +declare i64 @llvm.HSAIL.min.u64(i64, i64) #0 +declare i32 @llvm.HSAIL.min.s32(i32, i32) #0 +declare i64 @llvm.HSAIL.min.s64(i64, i64) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_min_u32( +; HSAIL: min_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_min_u32(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.min.u32(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min_u64( +; HSAIL: min_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_min_u64(i64 %x, i64 %y) #0 { + %val = call i64 @llvm.HSAIL.min.u64(i64 %x, i64 %y) #0 + ret i64 %val +} +; HSAIL-LABEL: {{^}}prog function &test_min_s32( +; HSAIL: min_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_min_s32(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.min.s32(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min_s64( +; HSAIL: min_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_min_s64(i64 %x, i64 %y) #0 { + %val = call i64 @llvm.HSAIL.min.s64(i64 %x, i64 %y) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.mul.ftz.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.mul.ftz.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.mul.ftz.f32(float) #0 + +; HSAIL-LABEL: {{^}}prog function &test_mul_ftz_f32( +; HSAIL: mul_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 0F3f800000; +define float @test_mul_ftz_f32(float %x) #0 { + %sqrt = call float @llvm.HSAIL.mul.ftz.f32(float %x) #0 + ret float %sqrt +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.nullptr.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.nullptr.ll @@ -0,0 +1,59 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL32 -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + +declare i8* @llvm.HSAIL.nullptr.private() #0 +declare i8 addrspace(1)* @llvm.HSAIL.nullptr.global.p1i8() #0 +declare i8 addrspace(2)* @llvm.HSAIL.nullptr.readonly.p2i8() #0 +declare i8 addrspace(3)* @llvm.HSAIL.nullptr.group() #0 +declare i8 addrspace(4)* @llvm.HSAIL.nullptr.flat.p4i8() #0 +declare i8 addrspace(7)* @llvm.HSAIL.nullptr.kernarg.p7i8() #0 + + +; HSAIL-LABEL: {{^}}prog function &test_nullptr_private( +; HSAIL: nullptr_private_u32 {{\$s[0-9]+}}; +define i8* @test_nullptr_private() #0 { + %val = call i8* @llvm.HSAIL.nullptr.private() #0 + ret i8* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_nullptr_global( +; HSAIL32: nullptr_u32 {{\$s[0-9]+}}; +; HSAIL64: nullptr_u64 {{\$d[0-9]+}}; +define i8 addrspace(1)* @test_nullptr_global() #0 { + %val = call i8 addrspace(1)* @llvm.HSAIL.nullptr.global.p1i8() #0 + ret i8 addrspace(1)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_nullptr_readonly( +; HSAIL32: nullptr_u32 {{\$s[0-9]+}}; +; HSAIL64: nullptr_u64 {{\$d[0-9]+}}; +define i8 addrspace(2)* @test_nullptr_readonly() #0 { + %val = call i8 addrspace(2)* @llvm.HSAIL.nullptr.readonly.p2i8() #0 + ret i8 addrspace(2)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_nullptr_group( +; HSAIL: nullptr_group_u32 {{\$s[0-9]+}}; +define i8 addrspace(3)* @test_nullptr_group() #0 { + %val = call i8 addrspace(3)* @llvm.HSAIL.nullptr.group() #0 + ret i8 addrspace(3)* %val +} + +; HSAIL-LABEL: {{^}}prog function &test_nullptr_flat( +; HSAIL32: nullptr_u32 {{\$s[0-9]+}}; +; HSAIL64: nullptr_u64 {{\$d[0-9]+}}; +define i8 addrspace(4)* @test_nullptr_flat() #0 { + %val = call i8 addrspace(4)* @llvm.HSAIL.nullptr.flat.p4i8() #0 + ret i8 addrspace(4)* %val +} + + +; HSAIL-LABEL: {{^}}prog function &test_nullptr_kernarg( +; HSAIL32: nullptr_kernarg_u32 {{\$s[0-9]+}}; +; HSAIL64: nullptr_kernarg_u64 {{\$d[0-9]+}}; +define i8 addrspace(7)* @test_nullptr_kernarg() #0 { + %val = call i8 addrspace(7)* @llvm.HSAIL.nullptr.kernarg.p7i8() #0 + ret i8 addrspace(7)* %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.sqrt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.sqrt.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.HSAIL.sqrt.ftz.f32(float) #0 +declare double @llvm.HSAIL.sqrt.f64(double) #0 + +; HSAIL-LABEL: {{^}}prog function &hsail_sqrt_ftz_f32( +; HSAIL: sqrt_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @hsail_sqrt_ftz_f32(float addrspace(1)* %out, float addrspace(1)* %src) #1 { + %val = load float, float addrspace(1)* %src, align 4 + %sqrt = call float @llvm.HSAIL.sqrt.ftz.f32(float %val) #0 + store float %sqrt, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &hsail_sqrt_f64( +; HSAIL: sqrt_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @hsail_sqrt_f64(double addrspace(1)* %out, double addrspace(1)* %src) #1 { + %val = load double, double addrspace(1)* %src, align 4 + %sqrt = call double @llvm.HSAIL.sqrt.f64(double %val) #0 + store double %sqrt, double addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/llvm.HSAIL.umax3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.umax3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.umax3(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_max3_u32( +; HSAIL: gcn_max3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_max3_u32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.umax3(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_u32_rii( +; HSAIL: gcn_max3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1234, 2; +define i32 @test_max3_u32_rii(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umax3(i32 %x, i32 1234, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_u32_rir( +; HSAIL: gcn_max3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_max3_u32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.umax3(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_u32_rri( +; HSAIL: gcn_max3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_max3_u32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.umax3(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_u32_iri( +; HSAIL: gcn_max3_u32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_max3_u32_iri(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umax3(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_u32_iir( +; HSAIL: gcn_max3_u32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_max3_u32_iir(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umax3(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_max3_u32_iii( +; HSAIL: gcn_max3_u32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_max3_u32_iii() #0 { + %val = call i32 @llvm.HSAIL.umax3(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.umed3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.umed3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.umed3(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_med3_u32( +; HSAIL: gcn_med3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_med3_u32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.umed3(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_u32_rii( +; HSAIL: gcn_med3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1234, 2; +define i32 @test_med3_u32_rii(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umed3(i32 %x, i32 1234, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_u32_rir( +; HSAIL: gcn_med3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_med3_u32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.umed3(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_u32_rri( +; HSAIL: gcn_med3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_med3_u32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.umed3(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_u32_iri( +; HSAIL: gcn_med3_u32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_med3_u32_iri(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umed3(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_u32_iir( +; HSAIL: gcn_med3_u32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_med3_u32_iir(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umed3(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_med3_u32_iii( +; HSAIL: gcn_med3_u32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_med3_u32_iii() #0 { + %val = call i32 @llvm.HSAIL.umed3(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.HSAIL.umin3.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.HSAIL.umin3.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hsail -mattr=+gcn -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.umin3(i32, i32, i32) #0 + +; HSAIL-LABEL: {{^}}prog function &test_min3_u32( +; HSAIL: gcn_min3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_min3_u32(i32 %x, i32 %y, i32 %z) #0 { + %val = call i32 @llvm.HSAIL.umin3(i32 %x, i32 %y, i32 %z) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_u32_rii( +; HSAIL: gcn_min3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1234, 2; +define i32 @test_min3_u32_rii(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umin3(i32 %x, i32 1234, i32 2) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_u32_rir( +; HSAIL: gcn_min3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 1, {{\$s[0-9]+}}; +define i32 @test_min3_u32_rir(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.umin3(i32 %x, i32 1, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_u32_rri( +; HSAIL: gcn_min3_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, 7; +define i32 @test_min3_u32_rri(i32 %x, i32 %y) #0 { + %val = call i32 @llvm.HSAIL.umin3(i32 %x, i32 %y, i32 7) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_u32_iri( +; HSAIL: gcn_min3_u32 {{\$s[0-9]+}}, 342421, {{\$s[0-9]+}}, 9; +define i32 @test_min3_u32_iri(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umin3(i32 342421, i32 %x, i32 9) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_u32_iir( +; HSAIL: gcn_min3_u32 {{\$s[0-9]+}}, 256, 65536, {{\$s[0-9]+}}; +define i32 @test_min3_u32_iir(i32 %x) #0 { + %val = call i32 @llvm.HSAIL.umin3(i32 256, i32 65536, i32 %x) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_min3_u32_iii( +; HSAIL: gcn_min3_u32 {{\$s[0-9]+}}, 3, 1234, 11; +define i32 @test_min3_u32_iii() #0 { + %val = call i32 @llvm.HSAIL.umin3(i32 3, i32 1234, i32 11) #0 + ret i32 %val +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/llvm.rint.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/llvm.rint.ll @@ -0,0 +1,87 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &rint_f32( +; HSAIL: rint_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @rint_f32(float addrspace(1)* %out, float %in) #1 { + %tmp = call float @llvm.rint.f32(float %in) #0 + store float %tmp, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &rint_v2f32( +; HSAIL: rint_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +; HSAIL: rint_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) #1 { + %tmp = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0 + store <2 x float> %tmp, <2 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &rint_v4f32( +; HSAIL: rint_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +; HSAIL: rint_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +; HSAIL: rint_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +; HSAIL: rint_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) #1 { + %tmp = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0 + store <4 x float> %tmp, <4 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &rint_f64( +; HSAIL: rint_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @rint_f64(double addrspace(1)* %out, double %in) #1 { + %tmp = call double @llvm.rint.f64(double %in) + store double %tmp, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &rint_v2f64( +; HSAIL: rint_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +; HSAIL: rint_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) #1 { + %tmp = call <2 x double> @llvm.rint.v2f64(<2 x double> %in) + store <2 x double> %tmp, <2 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &rint_v4f64( +; HSAIL: rint_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +; HSAIL: rint_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +; HSAIL: rint_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +; HSAIL: rint_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) #1 { + %tmp = call <4 x double> @llvm.rint.v4f64(<4 x double> %in) + store <4 x double> %tmp, <4 x double> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_rnd_f32( +; HSAIL: rint_ftz_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define void @legacy_hsail_rnd_f32(float addrspace(1)* %out, float %in) #1 { + %tmp = call float @llvm.HSAIL.rnd.f32(float %in) #0 + store float %tmp, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &legacy_hsail_rnd_f64( +; HSAIL: rint_f64 {{\$d[0-9]+}}, {{\$d[0-9]+}} +define void @legacy_hsail_rnd_f64(double addrspace(1)* %out, double %in) #1 { + %tmp = call double @llvm.HSAIL.rnd.f64(double %in) #0 + store double %tmp, double addrspace(1)* %out + ret void +} + +declare float @llvm.HSAIL.rnd.f32(float) #0 +declare double @llvm.HSAIL.rnd.f64(double) #0 + +declare float @llvm.rint.f32(float) #0 +declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0 +declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0 + +declare double @llvm.rint.f64(double) #0 +declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0 +declare <4 x double> @llvm.rint.v4f64(<4 x double>) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/load-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/load-i1.ll @@ -0,0 +1,66 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &zextload_i1_to_i32 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_u8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) { + %tmp1 = load i1, i1 addrspace(1)* %in, align 1 + %tmp2 = zext i1 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &zextload_i1_to_i64 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_u8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: cvt_u64_u32 [[RESULT:\$d[0-9]+]], [[VAL]] +; HSAIL: st_global_align(8)_u64 [[RESULT]] +; HSAIL: ret; +define void @zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) { + %tmp1 = load i1, i1 addrspace(1)* %in, align 1 + %tmp2 = zext i1 %tmp1 to i64 + store i64 %tmp2, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sextload_i1_to_i32 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_u8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: shl_u32 [[TMP0:\$s[0-9]+]], [[VAL]], 31; +; HSAIL: shr_s32 [[TMP1:\$s[0-9]+]], [[TMP0]], 31; +; HSAIL: st_global_align(4)_u32 [[TMP1]] +; HSAIL: ret; +define void @sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) { + %tmp1 = load i1, i1 addrspace(1)* %in, align 1 + %tmp2 = sext i1 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sextload_i1_to_i64 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_u8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: cvt_u64_u32 [[CVT:\$d[0-9]+]], [[VAL]] +; HSAIL: shl_u64 [[TMP0:\$d[0-9]+]], [[CVT]], 63; +; HSAIL: shr_s64 [[RESULT:\$d[0-9]+]], [[TMP0]], 63; +; HSAIL: st_global_align(8)_u64 [[RESULT]] +; HSAIL: ret; +define void @sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) { + %tmp1 = load i1, i1 addrspace(1)* %in, align 1 + %tmp2 = sext i1 %tmp1 to i64 + store i64 %tmp2, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_i1( +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_u8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_u8 [[VAL]] +; HSAIL: ret; +define void @load_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) { + %tmp1 = load i1, i1 addrspace(1)* %in, align 1 + store i1 %tmp1, i1 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/load.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/load.ll @@ -0,0 +1,445 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +;===------------------------------------------------------------------------===; +; GLOBAL ADDRESS SPACE +;===------------------------------------------------------------------------===; + +; Load an i8 value from the global address space. + +; FUNC-LABEL: {{^}}prog function &load_i8 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_u8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { + %tmp1 = load i8, i8 addrspace(1)* %in + %tmp2 = zext i8 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_i8_sext +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_s8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { + %tmp0 = load i8, i8 addrspace(1)* %in + %tmp1 = sext i8 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i8 +define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { + %tmp0 = load <2 x i8>, <2 x i8> addrspace(1)* %in + %tmp1 = zext <2 x i8> %tmp0 to <2 x i32> + store <2 x i32> %tmp1, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i8_sext +define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { + %tmp0 = load <2 x i8>, <2 x i8> addrspace(1)* %in + %tmp1 = sext <2 x i8> %tmp0 to <2 x i32> + store <2 x i32> %tmp1, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i8 +define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { + %tmp0 = load <4 x i8>, <4 x i8> addrspace(1)* %in + %tmp1 = zext <4 x i8> %tmp0 to <4 x i32> + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i8_sext +define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { + %tmp0 = load <4 x i8>, <4 x i8> addrspace(1)* %in + %tmp1 = sext <4 x i8> %tmp0 to <4 x i32> + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; Load an i16 value from the global address space. +; FUNC-LABEL: {{^}}prog function &load_i16 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_align(2)_u16 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { + %tmp0 = load i16 , i16 addrspace(1)* %in + %tmp1 = zext i16 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_i16_sext +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_align(2)_s16 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { + %tmp0 = load i16, i16 addrspace(1)* %in + %tmp1 = sext i16 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i16 +define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { + %tmp0 = load <2 x i16>, <2 x i16> addrspace(1)* %in + %tmp1 = zext <2 x i16> %tmp0 to <2 x i32> + store <2 x i32> %tmp1, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i16_sext +define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { + %tmp0 = load <2 x i16>, <2 x i16> addrspace(1)* %in + %tmp1 = sext <2 x i16> %tmp0 to <2 x i32> + store <2 x i32> %tmp1, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i16 +define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { + %tmp0 = load <4 x i16>, <4 x i16> addrspace(1)* %in + %tmp1 = zext <4 x i16> %tmp0 to <4 x i32> + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i16_sext +define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { + %tmp0 = load <4 x i16>, <4 x i16> addrspace(1)* %in + %tmp1 = sext <4 x i16> %tmp0 to <4 x i32> + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; load an i32 value from the global address space. + +; FUNC-LABEL: {{^}}prog function &load_i32 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_align(4)_u32 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %tmp0 = load i32, i32 addrspace(1)* %in + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; load a f32 value from the global address space. + +; FUNC-LABEL: {{^}}prog function &load_f32 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_align(4)_f32 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_f32 [[VAL]] +; HSAIL: ret; +define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { + %tmp0 = load float, float addrspace(1)* %in + store float %tmp0, float addrspace(1)* %out + ret void +} + +; load a v2f32 value from the global address space +; FUNC-LABEL: {{^}}prog function &load_v2f32 +define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { + %tmp0 = load <2 x float>, <2 x float> addrspace(1)* %in + store <2 x float> %tmp0, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_f64 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_align(8)_f64 [[VAL:\$d[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(8)_f64 [[VAL]] +; HSAIL: ret; +define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) { + %tmp0 = load double, double addrspace(1)* %in + store double %tmp0, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_i64 +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_global_align(8)_u64 [[VAL:\$d[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(8)_u64 [[VAL]] +; HSAIL: ret; +define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %tmp0 = load i64, i64 addrspace(1)* %in + store i64 %tmp0, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_i64_sext +define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { + %tmp0 = load i32, i32 addrspace(1)* %in + %tmp1 = sext i32 %tmp0 to i64 + store i64 %tmp1, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_i64_zext +define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { + %tmp0 = load i32, i32 addrspace(1)* %in + %tmp1 = zext i32 %tmp0 to i64 + store i64 %tmp1, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v8i32 +define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { + %tmp0 = load <8 x i32>, <8 x i32> addrspace(1)* %in + store <8 x i32> %tmp0, <8 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v16i32 +define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { + %tmp0 = load <16 x i32>, <16 x i32> addrspace(1)* %in + store <16 x i32> %tmp0, <16 x i32> addrspace(1)* %out + ret void +} + +;===------------------------------------------------------------------------===; +; CONSTANT ADDRESS SPACE +;===------------------------------------------------------------------------===; + +; Load a sign-extended i8 value +; FUNC-LABEL: {{^}}prog function &load_const_i8_sext +define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { + %tmp0 = load i8, i8 addrspace(2)* %in + %tmp1 = sext i8 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; Load an aligned i8 value +; FUNC-LABEL: {{^}}prog function &load_const_i8_aligned +define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { + %tmp0 = load i8, i8 addrspace(2)* %in + %tmp1 = zext i8 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; Load an un-aligned i8 value +; FUNC-LABEL: {{^}}prog function &load_const_i8_unaligned +define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { + %tmp0 = getelementptr i8, i8 addrspace(2)* %in, i32 1 + %tmp1 = load i8, i8 addrspace(2)* %tmp0 + %tmp2 = zext i8 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; Load a sign-extended i16 value +; FUNC-LABEL: {{^}}prog function &load_const_i16_sext +define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { + %tmp0 = load i16, i16 addrspace(2)* %in + %tmp1 = sext i16 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; Load an aligned i16 value +; FUNC-LABEL: {{^}}prog function &load_const_i16_aligned +define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { + %tmp0 = load i16, i16 addrspace(2)* %in + %tmp1 = zext i16 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; Load an un-aligned i16 value +; FUNC-LABEL: {{^}}prog function &load_const_i16_unaligned +define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { + %tmp0 = getelementptr i16, i16 addrspace(2)* %in, i32 1 + %tmp1 = load i16, i16 addrspace(2)* %tmp0 + %tmp2 = zext i16 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; Load an i32 value from the constant address space. + +; FUNC-LABEL: {{^}}prog function &load_const_addrspace_i32 +define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { + %tmp0 = load i32, i32 addrspace(2)* %in + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; Load a f32 value from the constant address space. + +; FUNC-LABEL: {{^}}prog function &load_const_addrspace_f32 +define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { + %tmp1 = load float, float addrspace(2)* %in + store float %tmp1, float addrspace(1)* %out + ret void +} + +;===------------------------------------------------------------------------===; +; LOCAL ADDRESS SPACE +;===------------------------------------------------------------------------===; + +; Load an i8 value from the local address space. +; FUNC-LABEL: {{^}}prog function &load_i8_local +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_group_u8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { + %tmp1 = load i8, i8 addrspace(3)* %in + %tmp2 = zext i8 %tmp1 to i32 + store i32 %tmp2, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_i8_sext_local +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_group_s8 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { + %tmp0 = load i8, i8 addrspace(3)* %in + %tmp1 = sext i8 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i8_local +define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { + %tmp0 = load <2 x i8>, <2 x i8> addrspace(3)* %in + %tmp1 = zext <2 x i8> %tmp0 to <2 x i32> + store <2 x i32> %tmp1, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i8_sext_local +define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { + %tmp0 = load <2 x i8>, <2 x i8> addrspace(3)* %in + %tmp1 = sext <2 x i8> %tmp0 to <2 x i32> + store <2 x i32> %tmp1, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i8_local +define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { + %tmp0 = load <4 x i8>, <4 x i8> addrspace(3)* %in + %tmp1 = zext <4 x i8> %tmp0 to <4 x i32> + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i8_sext_local +define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { + %tmp0 = load <4 x i8>, <4 x i8> addrspace(3)* %in + %tmp1 = sext <4 x i8> %tmp0 to <4 x i32> + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; Load an i16 value from the local address space. +; FUNC-LABEL: {{^}}prog function &load_i16_local +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_group_align(2)_u16 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { + %tmp0 = load i16 , i16 addrspace(3)* %in + %tmp1 = zext i16 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_i16_sext_local +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_group_align(2)_s16 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { + %tmp0 = load i16, i16 addrspace(3)* %in + %tmp1 = sext i16 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i16_local +define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { + %tmp0 = load <2 x i16>, <2 x i16> addrspace(3)* %in + %tmp1 = zext <2 x i16> %tmp0 to <2 x i32> + store <2 x i32> %tmp1, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v2i16_sext_local +define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { + %tmp0 = load <2 x i16>, <2 x i16> addrspace(3)* %in + %tmp1 = sext <2 x i16> %tmp0 to <2 x i32> + store <2 x i32> %tmp1, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i16_local +define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { + %tmp0 = load <4 x i16>, <4 x i16> addrspace(3)* %in + %tmp1 = zext <4 x i16> %tmp0 to <4 x i32> + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_v4i16_sext_local +define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { + %tmp0 = load <4 x i16>, <4 x i16> addrspace(3)* %in + %tmp1 = sext <4 x i16> %tmp0 to <4 x i32> + store <4 x i32> %tmp1, <4 x i32> addrspace(1)* %out + ret void +} + +; load an i32 value from the local address space. +; FUNC-LABEL: {{^}}prog function &load_i32_local +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_group_align(4)_u32 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_u32 [[VAL]] +; HSAIL: ret; +define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { + %tmp0 = load i32, i32 addrspace(3)* %in + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; load a f32 value from the local address space. +; FUNC-LABEL: {{^}}prog function &load_f32_local +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_group_align(4)_f32 [[VAL:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(4)_f32 [[VAL]] +; HSAIL: ret; +define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { + %tmp0 = load float, float addrspace(3)* %in + store float %tmp0, float addrspace(1)* %out + ret void +} + +; load a v2f32 value from the local address space +; FUNC-LABEL: {{^}}prog function &load_v2f32_local +define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { + %tmp0 = load <2 x float>, <2 x float> addrspace(3)* %in + store <2 x float> %tmp0, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &load_f64_local +; HSAIL: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL: ld_group_align(8)_f64 [[VAL:\$d[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL: st_global_align(8)_f64 [[VAL]] +; HSAIL: ret; +define void @load_f64_local(double addrspace(1)* %out, double addrspace(3)* %in) { + %tmp0 = load double, double addrspace(3)* %in + store double %tmp0, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/local-memory.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/local-memory.ll @@ -0,0 +1,128 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +@local_memory_i32.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4 + +; FUNC-LABEL: {{^}}prog function &local_memory_i32 +; HSAIL: group_u32 %local_memory_i32.local_mem[128]; +; HSAIL: st_group_align(4)_u32 {{\$s[0-9]+}}, [%local_memory_i32.local_mem][{{\$s[0-9]+}}]; +; HSAIL: barrier; +; HSAIL: ld_group_align(4)_u32 {{\$s[0-9]+}}, [%local_memory_i32.local_mem][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @local_memory_i32(i32 addrspace(1)* %out) #0 { + %y.i = call i32 @llvm.HSAIL.workitemid.flat() #0 + %arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory_i32.local_mem, i32 0, i32 %y.i + store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4 + %add = add nsw i32 %y.i, 1 + %cmp = icmp eq i32 %add, 16 + %.add = select i1 %cmp, i32 0, i32 %add + call void @llvm.HSAIL.barrier() + %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory_i32.local_mem, i32 0, i32 %.add + %tmp0 = load i32, i32 addrspace(3)* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i + store i32 %tmp0, i32 addrspace(1)* %arrayidx2, align 4 + ret void +} + +@local_memory_i8.local_mem = internal unnamed_addr addrspace(3) global [128 x i8] undef + +; FUNC-LABEL: {{^}}prog function &local_memory_i8 +; HSAIL: align(4) group_u8 %local_memory_i8.local_mem[128]; +; HSAIL: st_group_u8 {{\$s[0-9]+}}, [%local_memory_i8.local_mem][{{\$s[0-9]+}}]; +; HSAIL: barrier; +; HSAIL: ld_group_u8 {{\$s[0-9]+}}, [%local_memory_i8.local_mem][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @local_memory_i8(i8 addrspace(1)* %out) #0 { + %y.i = call i32 @llvm.HSAIL.workitemid.flat() #0 + %arrayidx = getelementptr inbounds [128 x i8], [128 x i8] addrspace(3)* @local_memory_i8.local_mem, i32 0, i32 %y.i + %y.i.trunc = trunc i32 %y.i to i8 + store i8 %y.i.trunc, i8 addrspace(3)* %arrayidx + %add = add nsw i32 %y.i, 1 + %cmp = icmp eq i32 %add, 16 + %.add = select i1 %cmp, i32 0, i32 %add + call void @llvm.HSAIL.barrier() + %arrayidx1 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(3)* @local_memory_i8.local_mem, i32 0, i32 %.add + %tmp0 = load i8, i8 addrspace(3)* %arrayidx1 + %arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %out, i32 %y.i + store i8 %tmp0, i8 addrspace(1)* %arrayidx2 + ret void +} + + +@local_memory_i16.local_mem = internal unnamed_addr addrspace(3) global [128 x i16] undef + +; FUNC-LABEL: {{^}}prog function &local_memory_i16 +; HSAIL: align(4) group_u16 %local_memory_i16.local_mem[128]; +; HSAIL: st_group_align(2)_u16 {{\$s[0-9]+}}, [%local_memory_i16.local_mem][{{\$s[0-9]+}}]; +; HSAIL: barrier; +; HSAIL: ld_group_align(2)_u16 {{\$s[0-9]+}}, [%local_memory_i16.local_mem][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @local_memory_i16(i16 addrspace(1)* %out) #0 { + %y.i = call i32 @llvm.HSAIL.workitemid.flat() #0 + %arrayidx = getelementptr inbounds [128 x i16], [128 x i16] addrspace(3)* @local_memory_i16.local_mem, i32 0, i32 %y.i + %y.i.trunc = trunc i32 %y.i to i16 + store i16 %y.i.trunc, i16 addrspace(3)* %arrayidx + %add = add nsw i32 %y.i, 1 + %cmp = icmp eq i32 %add, 16 + %.add = select i1 %cmp, i32 0, i32 %add + call void @llvm.HSAIL.barrier() + %arrayidx1 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(3)* @local_memory_i16.local_mem, i32 0, i32 %.add + %tmp0 = load i16, i16 addrspace(3)* %arrayidx1 + %arrayidx2 = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %y.i + store i16 %tmp0, i16 addrspace(1)* %arrayidx2 + ret void +} + +@local_memory_i64.local_mem = internal unnamed_addr addrspace(3) global [128 x i64] undef + +; FUNC-LABEL: {{^}}prog function &local_memory_i64 +; HSAIL: group_u64 %local_memory_i64.local_mem[128]; +; HSAIL: st_group_align(8)_u64 {{\$d[0-9]+}}, [%local_memory_i64.local_mem][{{\$s[0-9]+}}]; +; HSAIL: barrier; +; HSAIL: ld_group_align(8)_u64 {{\$d[0-9]+}}, [%local_memory_i64.local_mem][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @local_memory_i64(i64 addrspace(1)* %out) #0 { + %y.i = call i32 @llvm.HSAIL.workitemid.flat() #0 + %arrayidx = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @local_memory_i64.local_mem, i32 0, i32 %y.i + %y.i.trunc = sext i32 %y.i to i64 + store i64 %y.i.trunc, i64 addrspace(3)* %arrayidx + %add = add nsw i32 %y.i, 1 + %cmp = icmp eq i32 %add, 16 + %.add = select i1 %cmp, i32 0, i32 %add + call void @llvm.HSAIL.barrier() + %arrayidx1 = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @local_memory_i64.local_mem, i32 0, i32 %.add + %tmp0 = load i64, i64 addrspace(3)* %arrayidx1 + %arrayidx2 = getelementptr inbounds i64, i64 addrspace(1)* %out, i32 %y.i + store i64 %tmp0, i64 addrspace(1)* %arrayidx2 + ret void +} + + +@local_memory_f32.local_mem = internal unnamed_addr addrspace(3) global [128 x float] undef + +; FUNC-LABEL: {{^}}prog function &local_memory_f32 +; HSAIL: group_f32 %local_memory_f32.local_mem[128]; +; HSAIL: st_group_align(4)_f32 {{\$s[0-9]+}}, [%local_memory_f32.local_mem][{{\$s[0-9]+}}]; +; HSAIL: barrier; +; HSAIL: ld_group_align(4)_f32 {{\$s[0-9]+}}, [%local_memory_f32.local_mem][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @local_memory_f32(float addrspace(1)* %out) #0 { + %y.i = call i32 @llvm.HSAIL.workitemid.flat() #0 + %arrayidx = getelementptr inbounds [128 x float], [128 x float] addrspace(3)* @local_memory_f32.local_mem, i32 0, i32 %y.i + %y.i.trunc = uitofp i32 %y.i to float + store float %y.i.trunc, float addrspace(3)* %arrayidx + %add = add nsw i32 %y.i, 1 + %cmp = icmp eq i32 %add, 16 + %.add = select i1 %cmp, i32 0, i32 %add + call void @llvm.HSAIL.barrier() + %arrayidx1 = getelementptr inbounds [128 x float], [128 x float] addrspace(3)* @local_memory_f32.local_mem, i32 0, i32 %.add + %tmp0 = load float, float addrspace(3)* %arrayidx1 + %arrayidx2 = getelementptr inbounds float, float addrspace(1)* %out, i32 %y.i + store float %tmp0, float addrspace(1)* %arrayidx2 + ret void +} + +declare i32 @llvm.HSAIL.workitemid.flat() #1 +declare void @llvm.HSAIL.barrier() #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } Index: test/CodeGen/HSAIL/max.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/max.ll @@ -0,0 +1,252 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.get.global.id(i32) #0 + +; HSAIL-LABEL: {{^}}prog function &v_test_imax_sge_i32( +; HSAIL: max_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp sge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_imax_sge_i32( +; HSAIL-DAG: ld_arg_align(4)_u32 [[A:\$s[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[B:\$s[0-9]+]], [%b]; +; HSAIL: max_s32 {{\$s[0-9]+}}, [[A]], [[B]]; +define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #1 { + %cmp = icmp sge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_imax_sgt_i32( +; HSAIL: max_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp sgt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_imax_sgt_i32( +; HSAIL-DAG: ld_arg_align(4)_u32 [[A:\$s[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[B:\$s[0-9]+]], [%b]; +; HSAIL: max_s32 {{\$s[0-9]+}}, [[A]], [[B]]; +define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #1 { + %cmp = icmp sgt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umax_uge_i32( +; HSAIL: max_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp uge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_umax_uge_i32( +; HSAIL: max_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #1 { + %cmp = icmp uge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umax_ugt_i32( +; HSAIL: max_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ugt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_umax_ugt_i32 +; HSAIL-DAG: ld_arg_align(4)_u32 [[A:\$s[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[B:\$s[0-9]+]], [%b]; +; HSAIL: max_u32 {{\$s[0-9]+}}, [[A]], [[B]]; +define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #1 { + %cmp = icmp ugt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umax_ult_i32_multi_use( +; HSAIL-NOT: max_u32 +; HSAIL: cmp +; HSAIL-NEXT: cmov +; HSAIL-NOT: max_u32 +; HSAIL: ret; +define void @v_test_umax_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid + %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ugt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep0, align 4 + store i1 %cmp, i1 addrspace(1)* %outgep1 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_imax_sge_i64( +; HSAIL: max_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_test_imax_sge_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp sge i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_imax_sge_i64( +; HSAIL-DAG: ld_arg_align(8)_u64 [[A:\$d[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(8)_u64 [[B:\$d[0-9]+]], [%b]; +; HSAIL: max_s64 {{\$d[0-9]+}}, [[A]], [[B]]; +define void @s_test_imax_sge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #1 { + %cmp = icmp sge i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_imax_sgt_i64( +; HSAIL: max_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_test_imax_sgt_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp sgt i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_imax_sgt_i64( +; HSAIL-DAG: ld_arg_align(8)_u64 [[A:\$d[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(8)_u64 [[B:\$d[0-9]+]], [%b]; +; HSAIL: max_s64 {{\$d[0-9]+}}, [[A]], [[B]]; +define void @s_test_imax_sgt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #1 { + %cmp = icmp sgt i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umax_uge_i64( +; HSAIL: max_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_test_umax_uge_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp uge i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_umax_uge_i64( +; HSAIL: max_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @s_test_umax_uge_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #1 { + %cmp = icmp uge i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umax_ugt_i64( +; HSAIL: max_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_test_umax_ugt_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp ugt i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_umax_ugt_i64 +; HSAIL-DAG: ld_arg_align(8)_u64 [[A:\$d[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(8)_u64 [[B:\$d[0-9]+]], [%b]; +; HSAIL: max_u64 {{\$d[0-9]+}}, [[A]], [[B]]; +define void @s_test_umax_ugt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #1 { + %cmp = icmp ugt i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umax_ult_i64_multi_use( +; HSAIL-NOT: max_u64 +; HSAIL: cmp +; HSAIL-NEXT: cmov +; HSAIL-NOT: max_u64 +; HSAIL: ret; +define void @v_test_umax_ult_i64_multi_use(i64 addrspace(1)* %out0, i1 addrspace(1)* %out1, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep0 = getelementptr i64, i64 addrspace(1)* %out0, i32 %tid + %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp ugt i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep0 + store i1 %cmp, i1 addrspace(1)* %outgep1 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/min.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/min.ll @@ -0,0 +1,273 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.get.global.id(i32) #0 + +; HSAIL-LABEL: {{^}}prog function &v_test_imin_sle_i32( +; HSAIL: min_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp sle i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_imin_sle_i32( +; HSAIL-DAG: ld_arg_align(4)_u32 [[A:\$s[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[B:\$s[0-9]+]], [%b]; +; HSAIL: min_s32 {{\$s[0-9]+}}, [[A]], [[B]]; +define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #1 { + %cmp = icmp sle i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; FIXME: This is broken because of canonicaliztion of the le +; comparision with a constant. + +; HSAIL-LABEL: {{^}}prog function &test_imin_imm_sle_i32( +; HSAIL: cmp_lt_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 9; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, {{\$s[0-9]+}}, 8; +; XHSAIL: ld_arg_align(4)_u32 [[A:\$s[0-9]+]], [%a]; +; XHSAIL: min_s32 {{\$s[0-9]+}}, [[A]], 8; +define void @test_imin_imm_sle_i32(i32 addrspace(1)* %out, i32 %a) #1 { + %cmp = icmp sle i32 %a, 8 + %val = select i1 %cmp, i32 %a, i32 8 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_imin_imm_slt_i32( +; HSAIL: ld_arg_align(4)_u32 [[A:\$s[0-9]+]], [%a]; +; HSAIL: min_s32 {{\$s[0-9]+}}, [[A]], 8; +define void @test_imin_imm_slt_i32(i32 addrspace(1)* %out, i32 %a) #1 { + %cmp = icmp slt i32 %a, 8 + %val = select i1 %cmp, i32 %a, i32 8 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_imin_slt_i32( +; HSAIL: min_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp slt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_imin_slt_i32( +; HSAIL-DAG: ld_arg_align(4)_u32 [[A:\$s[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[B:\$s[0-9]+]], [%b]; +; HSAIL: min_s32 {{\$s[0-9]+}}, [[A]], [[B]]; +define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #1 { + %cmp = icmp slt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umin_ule_i32( +; HSAIL: min_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ule i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_umin_ule_i32( +; HSAIL: min_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #1 { + %cmp = icmp ule i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umin_ult_i32( +; HSAIL: min_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ult i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_umin_ult_i32( +; HSAIL: min_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) #1 { + %cmp = icmp ult i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umin_ult_i32_multi_use( +; HSAIL-NOT: min_u32 +; HSAIL: cmp +; HSAIL-NEXT: cmov +; HSAIL-NOT: min_u32 +; HSAIL: ret; +define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid + %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid + %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0, align 4 + %b = load i32, i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ult i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep0, align 4 + store i1 %cmp, i1 addrspace(1)* %outgep1 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_imin_sle_i64( +; HSAIL: min_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_test_imin_sle_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp sle i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_imin_sle_i64( +; HSAIL-DAG: ld_arg_align(8)_u64 [[A:\$d[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(8)_u64 [[B:\$d[0-9]+]], [%b]; +; HSAIL: min_s64 {{\$d[0-9]+}}, [[A]], [[B]]; +define void @s_test_imin_sle_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #1 { + %cmp = icmp sle i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_imin_slt_i64( +; HSAIL: min_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_test_imin_slt_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp slt i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_imin_slt_i64( +; HSAIL-DAG: ld_arg_align(8)_u64 [[A:\$d[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_align(8)_u64 [[B:\$d[0-9]+]], [%b]; +; HSAIL: min_s64 {{\$d[0-9]+}}, [[A]], [[B]]; +define void @s_test_imin_slt_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #1 { + %cmp = icmp slt i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umin_ule_i64( +; HSAIL: min_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_test_umin_ule_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp ule i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_umin_ule_i64( +; HSAIL: min_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @s_test_umin_ule_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #1 { + %cmp = icmp ule i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umin_ult_i64( +; HSAIL: min_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_test_umin_ult_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp ult i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_test_umin_ult_i64( +; HSAIL: min_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @s_test_umin_ult_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) #1 { + %cmp = icmp ult i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &v_test_umin_ult_i64_multi_use( +; HSAIL-NOT: min_u64 +; HSAIL: cmp +; HSAIL-NEXT: cmov +; HSAIL-NOT: min_u64 +; HSAIL: ret; +define void @v_test_umin_ult_i64_multi_use(i64 addrspace(1)* %out0, i1 addrspace(1)* %out1, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) #1 { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) #0 + %gep0 = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid + %outgep0 = getelementptr i64, i64 addrspace(1)* %out0, i32 %tid + %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid + %a = load i64, i64 addrspace(1)* %gep0 + %b = load i64, i64 addrspace(1)* %gep1 + %cmp = icmp ult i64 %a, %b + %val = select i1 %cmp, i64 %a, i64 %b + store i64 %val, i64 addrspace(1)* %outgep0 + store i1 %cmp, i1 addrspace(1)* %outgep1 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/module-statement.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/module-statement.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=DEFAULT32 -check-prefix=ALL %s +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=DEFAULT64 -check-prefix=ALL %s +; RUN: llc -march=hsail64 -mtriple=x86_64-apple-darwin < %s | FileCheck -strict-whitespace -check-prefix=DEFAULT64 -check-prefix=ALL %s + +; ALL-NOT: .text +; ALL-NOT: .macosx_version_min + +; DEFAULT32: {{^}}module &__llvm_hsail_module:1:0:$full:$small:$near; +; DEFAULT64: {{^}}module &__llvm_hsail_module:1:0:$full:$large:$near; + +define void @empty_func() { + ret void +} Index: test/CodeGen/HSAIL/mul.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/mul.ll @@ -0,0 +1,165 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_mul_v2i32 +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1) * %in + %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr + %result = mul <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_mul_v4i32 +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in + %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr + %result = mul <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}prog function &s_trunc_i64_mul_to_i32 +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { + %mul = mul i64 %b, %a + %trunc = trunc i64 %mul to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL:{{^}}prog function &v_trunc_i64_mul_to_i32 +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %b = load i64, i64 addrspace(1)* %bptr, align 8 + %mul = mul i64 %b, %a + %trunc = trunc i64 %mul to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL:{{^}}prog function &mul64_sext_c +; HSAIL: cvt_s64_s32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 80; +define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) { +entry: + %0 = sext i32 %in to i64 + %1 = mul i64 %0, 80 + store i64 %1, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_mul64_sext_c +; HSAIL: cvt_s64_s32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 80; +define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ext = sext i32 %val to i64 + %mul = mul i64 %ext, 80 + store i64 %mul, i64 addrspace(1)* %out, align 8 + ret void +} + + +; FUNC-LABEL: {{^}}prog function &v_mul64_sext_inline_imm +; HSAIL: cvt_s64_s32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 9; +define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { + %val = load i32, i32 addrspace(1)* %in, align 4 + %ext = sext i32 %val to i64 + %mul = mul i64 %ext, 9 + store i64 %mul, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &s_mul_i32 +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %mul = mul i32 %a, %b + store i32 %mul, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_mul_i32 +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = mul i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &s_mul_i64 +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind { + %mul = mul i64 %a, %b + store i64 %mul, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_mul_i64 +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) { + %a = load i64, i64 addrspace(1)* %aptr, align 8 + %b = load i64, i64 addrspace(1)* %bptr, align 8 + %mul = mul i64 %a, %b + store i64 %mul, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &mul32_in_branch +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: mul_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @mul32_in_branch(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b, i32 %c) { +entry: + %0 = icmp eq i32 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i32, i32 addrspace(1)* %in + br label %endif + +else: + %2 = mul i32 %a, %b + br label %endif + +endif: + %3 = phi i32 [%1, %if], [%2, %else] + store i32 %3, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &mul64_in_branch +; HSAIL: cmp_eq_b1_s64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, 0; +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @mul64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = load i64, i64 addrspace(1)* %in + br label %endif + +else: + %2 = mul i64 %a, %b + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/mulhs.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/mulhs.ll @@ -0,0 +1,26 @@ +; XFAIL: * +; This is failing because DAGCombiner decides it is profitable to +; replace 32-bit mulhs with extend and do 64-bit multiply. + +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.mulhi.s32(i32, i32) #0 +declare i64 @llvm.HSAIL.mulhi.s64(i64, i64) #0 + + +; HSAIL-LABEL: {{^}}prog function &test_mulhs_i32( +; HSAIL: mulhi_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_mulhs_i32(i32 %x, i32 %y) #1 { + %val = call i32 @llvm.HSAIL.mulhi.s32(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mulhs_i64( +; HSAIL: mulhi_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mulhs_i64(i64 %x, i64 %y) #1 { + %val = call i64 @llvm.HSAIL.mulhi.s64(i64 %x, i64 %y) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/mulhu.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/mulhu.ll @@ -0,0 +1,26 @@ +; XFAIL: * + +; This is failing because DAGCombiner decides it is profitable to +; replace 32-bit mulhu with extend and do 64-bit multiply. + +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +declare i32 @llvm.HSAIL.mulhi.u32(i32, i32) #0 +declare i64 @llvm.HSAIL.mulhi.u64(i64, i64) #0 + +; HSAIL-LABEL: {{^}}prog function &test_mulhu_i32( +; HSAIL: mulhi_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define i32 @test_mulhu_i32(i32 %x, i32 %y) #1 { + %val = call i32 @llvm.HSAIL.mulhi.u32(i32 %x, i32 %y) #0 + ret i32 %val +} + +; HSAIL-LABEL: {{^}}prog function &test_mulhu_i64( +; HSAIL: mulhi_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define i64 @test_mulhu_i64(i64 %x, i64 %y) #1 { + %val = call i64 @llvm.HSAIL.mulhi.u64(i64 %x, i64 %y) #0 + ret i64 %val +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/noinline.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/noinline.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL: {{^}}prog function &do_not_inline( +define i32 @do_not_inline() nounwind noinline { + ret i32 123 +} + +; HSAIL: {{^}}prog function &caller()(arg_u32 %out) +; HSAIL: call &do_not_inline (%do_not_inline) (); +define void @caller(i32 addrspace(1)* %out) nounwind { + %x = call i32 @do_not_inline() nounwind noinline + store i32 %x, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/or.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/or.ll @@ -0,0 +1,141 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &or_v2i32 +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1) * %in + %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr + %result = or <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &or_v4i32 +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in + %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr + %result = or <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &scalar_or_i32 +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %or = or i32 %a, %b + store i32 %or, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_or_i32 +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) { + %loada = load i32, i32 addrspace(1)* %a + %or = or i32 %loada, %b + store i32 %or, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &scalar_or_literal_i32 +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 99999; +define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) { + %or = or i32 %a, 99999 + store i32 %or, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_or_literal_i32 +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 65535; +define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { + %loada = load i32, i32 addrspace(1)* %a, align 4 + %or = or i32 %loada, 65535 + store i32 %or, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_or_inline_immediate_i32 +; HSAIL: or_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 4; +define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { + %loada = load i32, i32 addrspace(1)* %a, align 4 + %or = or i32 %loada, 4 + store i32 %or, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}prog function &scalar_or_i64 +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %or = or i64 %a, %b + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC_LABEL: {{^}}prog function &vector_or_i64 +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64, i64 addrspace(1)* %a, align 8 + %loadb = load i64, i64 addrspace(1)* %a, align 8 + %or = or i64 %loada, %loadb + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &scalar_vector_or_i64 +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) { + %loada = load i64, i64 addrspace(1)* %a + %or = or i64 %loada, %b + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_or_i64_loadimm +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 22470723082367; +define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64, i64 addrspace(1)* %a, align 8 + %or = or i64 %loada, 22470723082367 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_or_i64_imm +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 8; +define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { + %loada = load i64, i64 addrspace(1)* %a, align 8 + %or = or i64 %loada, 8 + store i64 %or, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL:{{^}}prog function &trunc_i64_or_to_i32 +; HSAIL: or_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: cvt_u32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { + %add = or i64 %b, %a + %trunc = trunc i64 %add to i32 + store i32 %trunc, i32 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL:{{^}}prog function &or_i1 +; HSAIL: cmp_ge_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL: cmp_ge_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL: or_b1 {{\$c[0-9]+}}, {{\$c[0-9]+}}, {{\$c[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { + %a = load float, float addrspace(1) * %in0 + %b = load float, float addrspace(1) * %in1 + %acmp = fcmp oge float %a, 0.000000e+00 + %bcmp = fcmp oge float %b, 0.000000e+00 + %or = or i1 %acmp, %bcmp + %result = select i1 %or, float %a, float %b + store float %result, float addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/pack_opt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/pack_opt.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &test_pack_opt( +; HSAIL: ld_arg_align(4)_u32 [[SRC:\$s[0-9]+]], [%src]; +; HSAIL: pack_u32x2_u32 [[DEST:\$d[0-9]+]], u32x2(0,0), [[SRC]], 1; +; HSAIL: st_arg_align(8)_u64 [[DEST]] +define i64 @test_pack_opt(i32 %src) #0 { + %tmp1 = zext i32 %src to i64 + %result = shl i64 %tmp1, 32 + ret i64 %result +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/private-memory.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/private-memory.ll @@ -0,0 +1,411 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=OPT %s +; RUN: llc -march=hsail64 < %s | FileCheck -check-prefix=HSAIL -check-prefix=OPT %s +; RUN: llc -O0 -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=SPILL -check-prefix=SPILL32 %s +; RUN: llc -O0 -march=hsail64 < %s | FileCheck -check-prefix=HSAIL -check-prefix=SPILL -check-prefix=SPILL64 %s + +declare i32 @llvm.HSAIL.get.global.id(i32) nounwind readnone + +; HSAIL-LABEL: {{^}}prog function &mova_same_clause()( +; HSAIL: align(4) private_u8 %__privateStack[20]; +; HSAIL-DAG: st_private_align(4)_u32 4, [%__privateStack][{{\$s[0-9]+}}]; +; HSAIL-DAG: st_private_align(4)_u32 4, [%__privateStack][{{\$s[0-9]+}}]; +; HSAIL-DAG: ld_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack]; +; HSAIL-DAG: ld_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack][4]; +; HSAIL: ret; +define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { +entry: + %stack = alloca [5 x i32], align 4 + %0 = load i32, i32 addrspace(1)* %in, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0 + store i32 4, i32* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 + %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1 + store i32 5, i32* %arrayidx3, align 4 + %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0 + %2 = load i32, i32* %arrayidx10, align 4 + store i32 %2, i32 addrspace(1)* %out, align 4 + %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1 + %3 = load i32, i32* %arrayidx12 + %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 + store i32 %3, i32 addrspace(1)* %arrayidx13 + ret void +} + +%struct.point = type { i32, i32 } + +; HSAIL-LABEL: {{^}}prog function &multiple_structs()( +; HSAIL: align(8) private_u8 %__privateStack[16]; + +; HSAIL-DAG: st_private_align(8)_u32 0, [%__privateStack]; +; HSAIL-DAG: st_private_align(4)_u32 1, [%__privateStack][4]; +; HSAIL-DAG: st_private_align(8)_u64 12884901890, [%__privateStack][8]; +; HSAIL-DAG: ld_private_align(8)_u32 {{\$s[0-9]+}}, [%__privateStack][8]; +; HSAIL-DAG: ld_private_align(8)_u32 {{\$s[0-9]+}}, [%__privateStack]; +; HSAIL: ret; +define void @multiple_structs(i32 addrspace(1)* %out) { +entry: + %a = alloca %struct.point + %b = alloca %struct.point + %a.x.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0 + %a.y.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 1 + %b.x.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0 + %b.y.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 1 + store i32 0, i32* %a.x.ptr + store i32 1, i32* %a.y.ptr + store i32 2, i32* %b.x.ptr + store i32 3, i32* %b.y.ptr + %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0 + %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0 + %a.indirect = load i32, i32* %a.indirect.ptr + %b.indirect = load i32, i32* %b.indirect.ptr + %0 = add i32 %a.indirect, %b.indirect + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &direct_loop()( +; HSAIL: align(4) private_u8 %__privateStack[16]; +; SPILL32: align(4) spill_u8 %__spillStack[8]; +; SPILL64: align(8) spill_u8 %__spillStack[12]; + +; HSAIL: st_private_align(4)_u64 {{\$d[0-9]+}}, [%__privateStack]; +; SPILL32-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack]; +; SPILL32-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][4]; +; SPILL64-DAG: st_spill_align(8)_u64 {{\$d[0-9]+}}, [%__spillStack]; +; SPILL64-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][8]; + +; HSAIL: @BB2_1: +; SPILL32-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][4]; +; SPILL64-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][8]; +; HSAIL-DAG: ld_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack]; +; HSAIL-DAG: st_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack][8]; +; HSAIL: cbr + +; HSAIL-DAG: ld_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack][8]; +; SPILL32-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack]; +; SPILL64-DAG: ld_spill_align(8)_u64 {{\$d[0-9]+}}, [%__spillStack]; +; HSAIL: ret; +define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { +entry: + %prv_array_const = alloca [2 x i32] + %prv_array = alloca [2 x i32] + %a = load i32, i32 addrspace(1)* %in + %b_src_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %b = load i32, i32 addrspace(1)* %b_src_ptr + %a_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0 + store i32 %a, i32* %a_dst_ptr + %b_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1 + store i32 %b, i32* %b_dst_ptr + br label %for.body + +for.body: + %inc = phi i32 [0, %entry], [%count, %for.body] + %x_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0 + %x = load i32, i32* %x_ptr + %y_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0 + %y = load i32, i32* %y_ptr + %xy = add i32 %x, %y + store i32 %xy, i32* %y_ptr + %count = add i32 %inc, 1 + %done = icmp eq i32 %count, 4095 + br i1 %done, label %for.end, label %for.body + +for.end: + %value_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0 + %value = load i32, i32* %value_ptr + store i32 %value, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &short_array()( +; HSAIL: align(4) private_u8 %__privateStack[4]; + +; HSAIL: st_private_align(2)_u32 65536, [%__privateStack]; +; HSAIL: ld_private_align(2)_s16 {{\$s[0-9]+}}, [%__privateStack][$s0]; +; HSAIL: ret; +define void @short_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %0 = alloca [2 x i16] + %1 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 0 + %2 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 1 + store i16 0, i16* %1 + store i16 1, i16* %2 + %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index + %4 = load i16, i16* %3 + %5 = sext i16 %4 to i32 + store i32 %5, i32 addrspace(1)* %out + ret void +} + +; FIXME: This should not be overriding the alignment +; HSAIL-LABEL: {{^}}prog function &align1_short_array()( +; HSAIL: align(4) private_u8 %__privateStack[4]; +; HSAIL: ret; +define void @align1_short_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %0 = alloca [2 x i16], align 1 + %1 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 0 + %2 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 1 + store i16 0, i16* %1 + store i16 1, i16* %2 + %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index + %4 = load i16, i16* %3 + %5 = sext i16 %4 to i32 + store i32 %5, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &align32_short_array()( +; HSAIL: align(32) private_u8 %__privateStack[4]; +; HSAIL: ret; +define void @align32_short_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %0 = alloca [2 x i16], align 32 + %1 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 0 + %2 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 1 + store i16 0, i16* %1 + store i16 1, i16* %2 + %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index + %4 = load i16, i16* %3 + %5 = sext i16 %4 to i32 + store i32 %5, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &char_array()( +; HSAIL: align(4) private_u8 %__privateStack[2]; +; HSAIL: st_private_u16 256, [%__privateStack]; +; HSAIL: ld_private_s8 $s0, [%__privateStack][$s0]; +; HSAIL: ret; +define void @char_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %0 = alloca [2 x i8] + %1 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 0 + %2 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 1 + store i8 0, i8* %1 + store i8 1, i8* %2 + %3 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 %index + %4 = load i8, i8* %3 + %5 = sext i8 %4 to i32 + store i32 %5, i32 addrspace(1)* %out + ret void + +} + +; HSAIL-LABEL: {{^}}prog function &work_item_info()( +; HSAIL: align(4) private_u8 %__privateStack[8]; + +; HSAIL: st_private_align(4)_u64 4294967296, [%__privateStack]; +; HSAIL: ld_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack][$s0]; +; HSAIL: ret; +define void @work_item_info(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = alloca [2 x i32] + %1 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 0 + %2 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 1 + store i32 0, i32* %1 + store i32 1, i32* %2 + %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in + %4 = load i32, i32* %3 + %5 = call i32 @llvm.HSAIL.get.global.id(i32 0) + %6 = add i32 %4, %5 + store i32 %6, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &no_overlap()( +; HSAIL: align(4) private_u8 %__privateStack[5]; + + +; HSAIL-DAG: st_private_u8 0, [%__privateStack]; +; HSAIL-DAG: st_private_u8 1, [%__privateStack][1]; +; HSAIL-DAG: st_private_u8 2, [%__privateStack][2]; +; HSAIL-DAG: st_private_u16 1, [%__privateStack][3]; + +; HSAIL-DAG: ld_private_u8 {{\$s[0-9]+}}, [%__privateStack][{{\$s[0-9]+}}+3]; +; HSAIL-DAG: ld_private_u8 {{\$s[0-9]+}}, [%__privateStack][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @no_overlap(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = alloca [3 x i8], align 1 + %1 = alloca [2 x i8], align 1 + %2 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 0 + %3 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 1 + %4 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 2 + %5 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 0 + %6 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 1 + store i8 0, i8* %2 + store i8 1, i8* %3 + store i8 2, i8* %4 + store i8 1, i8* %5 + store i8 0, i8* %6 + %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in + %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in + %9 = load i8, i8* %7 + %10 = load i8, i8* %8 + %11 = add i8 %9, %10 + %12 = sext i8 %11 to i32 + store i32 %12, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &char_array_array()( +; HSAIL: align(4) private_u8 %__privateStack[4]; +; HSAIL: st_private_u16 256, [%__privateStack]; +; HSAIL: ld_private_s8 $s0, [%__privateStack][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @char_array_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x [2 x i8]] + %gep0 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 0 + %gep1 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 1 + store i8 0, i8* %gep0 + store i8 1, i8* %gep1 + %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index + %load = load i8, i8* %gep2 + %sext = sext i8 %load to i32 + store i32 %sext, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i32_array_array()( +; HSAIL: align(4) private_u8 %__privateStack[16]; +define void @i32_array_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x [2 x i32]] + %gep0 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 0 + %gep1 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 1 + store i32 0, i32* %gep0 + store i32 1, i32* %gep1 + %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index + %load = load i32, i32* %gep2 + store i32 %load, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &i64_array_array()( +; HSAIL: align(8) private_u8 %__privateStack[32]; +define void @i64_array_array(i64 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x [2 x i64]] + %gep0 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 0 + %gep1 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 1 + store i64 0, i64* %gep0 + store i64 1, i64* %gep1 + %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index + %load = load i64, i64* %gep2 + store i64 %load, i64 addrspace(1)* %out + ret void +} + +%struct.pair32 = type { i32, i32 } + +; HSAIL-LABEL: {{^}}prog function &struct_array_array()( +; HSAIL: align(8) private_u8 %__privateStack[32]; + +; HSAIL-DAG: st_private_align(4)_u32 0, [%__privateStack][4]; +; HSAIL-DAG: st_private_align(4)_u32 1, [%__privateStack][12]; +; HSAIL: ld_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @struct_array_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x [2 x %struct.pair32]] + %gep0 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 0, i32 1 + %gep1 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 1, i32 1 + store i32 0, i32* %gep0 + store i32 1, i32* %gep1 + %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0 + %load = load i32, i32* %gep2 + store i32 %load, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &struct_pair32_array()( +; HSAIL: align(8) private_u8 %__privateStack[16]; + +; HSAIL: st_private_align(4)_u64 4294967296, [%__privateStack][4]; +; HSAIL-DAG: ld_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack][{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [2 x %struct.pair32] + %gep0 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 0, i32 1 + %gep1 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 1, i32 0 + store i32 0, i32* %gep0 + store i32 1, i32* %gep1 + %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0 + %load = load i32, i32* %gep2 + store i32 %load, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &select_private()( +; HSAIL: align(4) private_u8 %__privateStack[8]; + +; HSAIL: st_private_align(4)_u64 4294967296, [%__privateStack]; +; HSAIL: ld_private_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}]; +define void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind { +entry: + %tmp = alloca [2 x i32] + %tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0 + %tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1 + store i32 0, i32* %tmp1 + store i32 1, i32* %tmp2 + %cmp = icmp eq i32 %in, 0 + %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2 + %load = load i32, i32* %sel + store i32 %load, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &ptrtoint_private()( +; HSAIL: align(4) private_u8 %__privateStack[64]; +; HSAIL: st_private_align(4)_u32 5, [%__privateStack][{{\$s[0-9]+}}]; +; HSAIL: ld_private_align(4)_u32 $s0, [%__privateStack][{{\$s[0-9]+}}+5]; +; HSAIL: ret; +define void @ptrtoint_private(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %alloca = alloca [16 x i32] + %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %a + store i32 5, i32* %tmp0 + %tmp1 = ptrtoint [16 x i32]* %alloca to i32 + %tmp2 = add i32 %tmp1, 5 + %tmp3 = inttoptr i32 %tmp2 to i32* + %tmp4 = getelementptr i32, i32* %tmp3, i32 %b + %tmp5 = load i32, i32* %tmp4 + store i32 %tmp5, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function ®_and_imm_offset()( +; HSAIL: align(4) private_u8 %__privateStack[64]; + +; HSAIL-DAG: st_private_align(4)_u32 3, [%__privateStack]; +; HSAIL-DAG: st_private_align(4)_u32 1, [%__privateStack][12]; +; HSAIL-DAG: st_private_align(4)_u32 2, [%__privateStack][32]; +; HSAIL-DAG: shl_u32 [[PTR:\$s[0-9]+]] +; HSAIL-DAG: st_private_align(4)_u32 7, [%__privateStack]{{\[}}[[PTR]]{{\]}}; +; HSAIL-DAG: st_private_align(4)_u32 9, [%__privateStack]{{\[}}[[PTR]]+8]; + +; HSAIL: ld_private_align(4)_u32 {{\$s[0-9]+}}, [%__privateStack]{{\[}}[[PTR]]+12]; +; HSAIL: ret; +define void @reg_and_imm_offset(i32 addrspace(1)* %out, i32 %index) { +entry: + %alloca = alloca [16 x i32] + %gep0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0 + %gep1 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 3 + %gep2 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 8 + %gep3 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %index + %gep3.sum1 = add i32 %index, 2 + %gep4 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %gep3.sum1 + store i32 3, i32* %gep0 + store i32 1, i32* %gep1 + store i32 2, i32* %gep2 + store i32 7, i32* %gep3 + store i32 9, i32* %gep4 + %gep3.sum = add i32 %index, 3 + %gep5 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 %gep3.sum + %load = load i32, i32* %gep5 + store i32 %load, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/ptrtoint-constantexpr-initializer-broken.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ptrtoint-constantexpr-initializer-broken.ll @@ -0,0 +1,13 @@ +; XFAIL: * +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL32 -check-prefix=HSAIL %s +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + +; constantexpr ptrtoints to the wrong pointer size assert + +@int0 = internal unnamed_addr addrspace(2) constant i32 9 +@lds.int0 = internal unnamed_addr addrspace(3) global i32 undef + +@ptr_to_int_gv_p3i32_ext = addrspace(2) constant i64 ptrtoint (i32 addrspace(3)* @lds.int0 to i64) +@ptr_to_int_gv_p2i32_trunc = addrspace(2) constant i32 ptrtoint (i32 addrspace(2)* @int0 to i32) + + Index: test/CodeGen/HSAIL/ptrtoint-constantexpr-initializer32.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ptrtoint-constantexpr-initializer32.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + +; HSAIL-DAG: prog readonly_u32 &ptr_to_int_gv_p3i32 = 0; +; HSAIL-DAG: pragma "initvarwithaddress:&ptr_to_int_gv_p3i32:0:4:%lds.int0:0"; + +; HSAIL32-DAG: prog readonly_u32 &ptr_to_int_gv_p2i32 = 0; +; HSAIL32-DAG: pragma "initvarwithaddress:&ptr_to_int_gv_p2i32:0:4:&int0:0"; + +@int0 = internal unnamed_addr addrspace(2) constant i32 9 +@lds.int0 = internal unnamed_addr addrspace(3) global i32 undef + +@ptr_to_int_gv_p3i32 = addrspace(2) constant i32 ptrtoint (i32 addrspace(3)* @lds.int0 to i32) +@ptr_to_int_gv_p2i32 = addrspace(2) constant i32 ptrtoint (i32 addrspace(2)* @int0 to i32) \ No newline at end of file Index: test/CodeGen/HSAIL/ptrtoint-constantexpr-initializer64.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/ptrtoint-constantexpr-initializer64.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=hsail64 -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL64 -check-prefix=HSAIL %s + +; HSAIL-DAG: prog readonly_u32 &ptr_to_int_gv_p3i32 = 0; +; HSAIL-DAG: pragma "initvarwithaddress:&ptr_to_int_gv_p3i32:0:4:%lds.int0:0"; + +; HSAIL64-DAG: prog readonly_u64 &ptr_to_int_gv_p2i32 = 0; +; HSAIL64-DAG: pragma "initvarwithaddress:&ptr_to_int_gv_p2i32:0:8:&int0:0"; + +@int0 = internal unnamed_addr addrspace(2) constant i32 9 +@lds.int0 = internal unnamed_addr addrspace(3) global i32 undef + +@ptr_to_int_gv_p3i32 = addrspace(2) constant i32 ptrtoint (i32 addrspace(3)* @lds.int0 to i32) +@ptr_to_int_gv_p2i32 = addrspace(2) constant i64 ptrtoint (i32 addrspace(2)* @int0 to i64) \ No newline at end of file Index: test/CodeGen/HSAIL/reg-coalescer-assert.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/reg-coalescer-assert.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; This asserted in the register coalescer because it couldn't figure +; out the dest reg class from the mov instruction definition. + +; HSAIL-LABEL: {{^}}prog function &foo( +; HSAIL: mov_b64 {{\$d[0-9]+}}, 0; +; HSAIL: mov_b64 {{\$d[0-9]+}}, 0; +; HSAIL-NOT: mov +; HSAIL: @BB0_1: +; HSAIL-NOT: mov +; HSAIL: br +define void @foo() #0 { +bb: + br label %bb1 + +bb1: + %tmp = phi i64 [ 0, %bb ], [ %tmp5, %bb1 ] + %tmp2 = mul nsw i64 %tmp, 6 + %tmp3 = insertelement <4 x i64> undef, i64 %tmp2, i32 1 + %tmp4 = mul nsw i64 %tmp, 15 + %tmp5 = add i64 %tmp, 1 + br label %bb1 +} + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/return-value.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/return-value.ll @@ -0,0 +1,257 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL: decl prog function &return_i32(arg_u32 %ret)(); +; HSAIL: decl prog function &return_v4i32(align(16) arg_u32 %ret[4])(); + +; HSAIL: decl prog function &return_arg_i1(arg_u8 %ret)(arg_u8 %x); +; HSAIL: decl prog function &return_sext_arg_i1(arg_s32 %ret)(arg_u8 %x); +; HSAIL: decl prog function &return_zext_arg_i1(arg_u32 %ret)(arg_u8 %x); + +; HSAIL: decl prog function &return_arg_i8(arg_u8 %ret)(arg_u8 %x); +; HSAIL: decl prog function &return_sext_arg_i8(arg_s32 %ret)(arg_u8 %x); +; HSAIL: decl prog function &return_zext_arg_i8(arg_u32 %ret)(arg_u8 %x); +; HSAIL: decl prog function &return_arg_i16(arg_u16 %ret)(arg_u16 %x); +; HSAIL: decl prog function &return_sext_arg_i16(arg_s32 %ret)(arg_u16 %x); +; HSAIL: decl prog function &return_zext_arg_i16(arg_u32 %ret)(arg_u16 %x); +; HSAIL: decl prog function &return_arg_i32(arg_u32 %ret)(arg_u32 %x); + + +; HSAIL: decl prog function &return_vector_arg_v1i32(arg_u32 %ret[1])(arg_u32 %x[1]); +; HSAIL: decl prog function &return_vector_arg_v2i32(align(8) arg_u32 %ret[2])(align(8) arg_u32 %x[2]); +; HSAIL: decl prog function &return_vector_arg_v3i32(align(16) arg_u32 %ret[4])(align(16) arg_u32 %x[4]); +; HSAIL: decl prog function &return_vector_arg_v4i32(align(16) arg_u32 %ret[4])(align(16) arg_u32 %x[4]); +; HSAIL: decl prog function &return_vector_arg_v8i32(align(32) arg_u32 %ret[8])(align(32) arg_u32 %x[8]); +; HSAIL: decl prog function &return_vector_arg_v16i32(align(64) arg_u32 %ret[16])(align(64) arg_u32 %x[16]); + +; HSAIL: decl prog function &return_arg_i64(arg_u64 %ret)(arg_u64 %x); +; HSAIL: decl prog function &return_vector_arg_v1i64(arg_u64 %ret[1])(arg_u64 %x[1]); +; HSAIL: decl prog function &return_vector_arg_v2i64(align(16) arg_u64 %ret[2])(align(16) arg_u64 %x[2]); +; HSAIL: decl prog function &return_vector_arg_v3i64(align(32) arg_u64 %ret[4])(align(32) arg_u64 %x[4]); +; HSAIL: decl prog function &return_vector_arg_v4i64(align(32) arg_u64 %ret[4])(align(32) arg_u64 %x[4]); +; HSAIL: decl prog function &return_vector_arg_v8i64(align(64) arg_u64 %ret[8])(align(64) arg_u64 %x[8]); +; HSAIL: decl prog function &return_vector_arg_v16i64(align(128) arg_u64 %ret[16])(align(128) arg_u64 %x[16]); + +; HSAIL: decl prog function &return_struct_arg(align(4) arg_u8 %ret[8])(align(4) arg_u8 %x[8]); + +; HSAIL: decl prog function &struct_global_byval_arg()( +; HSAIL-NEXT: arg_u32 %test_byval_arg, +; HSAIL-NEXT: align(4) arg_u8 %x[8]); + +; HSAIL: decl prog function &struct_private_byval_arg()( +; HSAIL-NEXT: arg_u32 %test_byval_arg, +; HSAIL-NEXT: align(4) arg_u8 %x[8]); + +; HSAIL: decl prog function &return_arg_array_i32(arg_u32 %ret[4])(arg_u32 %x[4]); + + +; HSAIL: prog function &return_i32(arg_u32 %return_i32)() +; HSAIL: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_i32]; +define i32 @return_i32() { + ret i32 123 +} + +; HSAIL: prog function &return_v4i32(align(16) arg_u32 %return_v4i32[4])() +; HSAIL-DAG: st_arg_align(16)_u32 {{\$s[0-9]+}}, [%return_v4i32]; +; HSAIL-DAG: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_v4i32][4]; +; HSAIL-DAG: st_arg_align(8)_u32 {{\$s[0-9]+}}, [%return_v4i32][8]; +; HSAIL-DAG: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_v4i32][12]; +define <4 x i32> @return_v4i32() { + ret <4 x i32> +} + +; HSAIL-LABEL: prog function &return_arg_i1(arg_u8 %return_arg_i1)(arg_u8 %x) +; HSAIL: ld_arg_u8 [[LDI1:\$s[0-9]+]], [%x]; +; HSAIL: and_b32 [[RESULT:\$s[0-9]+]], [[LDI1]], 1; +; HSAIL: st_arg_u8 [[RESULT]], [%return_arg_i1]; +define i1 @return_arg_i1(i1 %x) { + ret i1 %x +} + +; HSAIL-LABEL: prog function &return_sext_arg_i1(arg_s32 %return_sext_arg_i1)(arg_u8 %x) +; HSAIL: ld_arg_u8 [[LDI1:\$s[0-9]+]], [%x]; +; HSAIL: shl_u32 [[EXT0:\$s[0-9]+]], [[LDI1]], 31; +; HSAIL: shr_s32 [[RESULT:\$s[0-9]+]], [[EXT0]], 31; +; HSAIL: st_arg_u32 [[RESULT]], [%return_sext_arg_i1]; +define signext i1 @return_sext_arg_i1(i1 %x) { + ret i1 %x +} + +; HSAIL-LABEL: prog function &return_zext_arg_i1(arg_u32 %return_zext_arg_i1)(arg_u8 %x) +; HSAIL: ld_arg_u8 [[LDI1:\$s[0-9]+]], [%x]; +; HSAIL: and_b32 [[RESULT:\$s[0-9]+]], [[LDI1]], 1; +; HSAIL: st_arg_u32 [[RESULT]], [%return_zext_arg_i1]; +define zeroext i1 @return_zext_arg_i1(i1 %x) { + ret i1 %x +} + +; HSAIL-LABEL: prog function &return_arg_i8(arg_u8 %return_arg_i8)(arg_u8 %x) +; HSAIL: st_arg_u8 {{\$s[0-9]+}}, [%return_arg_i8]; +define i8 @return_arg_i8(i8 %x) { + ret i8 %x +} + +; HSAIL-LABEL: prog function &return_sext_arg_i8(arg_s32 %return_sext_arg_i8)(arg_u8 %x) +; HSAIL: st_arg_u8 {{\$s[0-9]+}}, [%return_sext_arg_i8]; +define signext i8 @return_sext_arg_i8(i8 %x) { + ret i8 %x +} + +; HSAIL: prog function &return_zext_arg_i8(arg_u32 %return_zext_arg_i8)(arg_u8 %x) +; HSAIL: st_arg_u8 {{\$s[0-9]+}}, [%return_zext_arg_i8]; +define zeroext i8 @return_zext_arg_i8(i8 %x) { + ret i8 %x +} + +; HSAIL: prog function &return_arg_i16(arg_u16 %return_arg_i16)(arg_u16 %x) +; HSAIL: st_arg_align(2)_u16 {{\$s[0-9]+}}, [%return_arg_i16]; +define i16 @return_arg_i16(i16 %x) { + ret i16 %x +} + +; HSAIL: prog function &return_sext_arg_i16(arg_s32 %return_sext_arg_i16)(arg_u16 %x) +; HSAIL: st_arg_align(2)_u16 {{\$s[0-9]+}}, [%return_sext_arg_i16]; +define signext i16 @return_sext_arg_i16(i16 %x) { + ret i16 %x +} + +; HSAIL: prog function &return_zext_arg_i16(arg_u32 %return_zext_arg_i16)(arg_u16 %x) +; HSAIL: st_arg_align(2)_u16 {{\$s[0-9]+}}, [%return_zext_arg_i16]; +define zeroext i16 @return_zext_arg_i16(i16 %x) { + ret i16 %x +} + +; HSAIL: prog function &return_arg_i32(arg_u32 %return_arg_i32)(arg_u32 %x) +; HSAIL: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_arg_i32]; +define i32 @return_arg_i32(i32 %x) { + ret i32 %x +} + +; HSAIL: prog function &return_vector_arg_v1i32(arg_u32 %return_vector_arg_v1i32[1])(arg_u32 %x[1]) +define <1 x i32> @return_vector_arg_v1i32(<1 x i32> %x) { + ret <1 x i32> %x +} + +; HSAIL: prog function &return_vector_arg_v2i32(align(8) arg_u32 %return_vector_arg_v2i32[2])(align(8) arg_u32 %x[2]) +; HSAIL: st_arg_align(8)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v2i32]; +; HSAIL: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v2i32][4]; +define <2 x i32> @return_vector_arg_v2i32(<2 x i32> %x) { + ret <2 x i32> %x +} + +; HSAIL: prog function &return_vector_arg_v3i32(align(16) arg_u32 %return_vector_arg_v3i32[4])(align(16) arg_u32 %x[4]) +; HSAIL: st_arg_align(16)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v3i32]; +; HSAIL: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v3i32][4]; +; HSAIL: st_arg_align(8)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v3i32][8]; +; HSAIL-NOT: st +; HSAIL: ret; +define <3 x i32> @return_vector_arg_v3i32(<3 x i32> %x) { + ret <3 x i32> %x +} + +; HSAIL: prog function &return_vector_arg_v4i32(align(16) arg_u32 %return_vector_arg_v4i32[4])(align(16) arg_u32 %x[4]) +; HSAIL: st_arg_align(16)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v4i32]; +; HSAIL: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v4i32][4]; +; HSAIL: st_arg_align(8)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v4i32][8]; +; HSAIL: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_vector_arg_v4i32][12]; +; HSAIL: ret; +define <4 x i32> @return_vector_arg_v4i32(<4 x i32> %x) { + ret <4 x i32> %x +} + +; HSAIL: prog function &return_vector_arg_v8i32(align(32) arg_u32 %return_vector_arg_v8i32[8])(align(32) arg_u32 %x[8]) +define <8 x i32> @return_vector_arg_v8i32(<8 x i32> %x) { + ret <8 x i32> %x +} + +; HSAIL: prog function &return_vector_arg_v16i32(align(64) arg_u32 %return_vector_arg_v16i32[16])(align(64) arg_u32 %x[16]) +define <16 x i32> @return_vector_arg_v16i32(<16 x i32> %x) { + ret <16 x i32> %x +} + +; HSAIL: prog function &return_arg_i64(arg_u64 %return_arg_i64)(arg_u64 %x) +; HSAIL: st_arg_align(8)_u64 {{\$d[0-9]+}}, [%return_arg_i64]; +define i64 @return_arg_i64(i64 %x) { + ret i64 %x +} + +; HSAIL: prog function &return_vector_arg_v1i64(arg_u64 %return_vector_arg_v1i64[1])(arg_u64 %x[1]) +define <1 x i64> @return_vector_arg_v1i64(<1 x i64> %x) { + ret <1 x i64> %x +} + +; HSAIL: prog function &return_vector_arg_v2i64(align(16) arg_u64 %return_vector_arg_v2i64[2])(align(16) arg_u64 %x[2]) +; HSAIL: st_arg_align(16)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v2i64]; +; HSAIL: st_arg_align(8)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v2i64][8]; +define <2 x i64> @return_vector_arg_v2i64(<2 x i64> %x) { + ret <2 x i64> %x +} + +; HSAIL: prog function &return_vector_arg_v3i64(align(32) arg_u64 %return_vector_arg_v3i64[4])(align(32) arg_u64 %x[4]) +; HSAIL: st_arg_align(32)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v3i64]; +; HSAIL: st_arg_align(8)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v3i64][8]; +; HSAIL: st_arg_align(16)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v3i64][16]; +; HSAIL-NOT: st +; HSAIL: ret; +define <3 x i64> @return_vector_arg_v3i64(<3 x i64> %x) { + ret <3 x i64> %x +} + +; HSAIL: prog function &return_vector_arg_v4i64(align(32) arg_u64 %return_vector_arg_v4i64[4])(align(32) arg_u64 %x[4]) +; HSAIL: st_arg_align(32)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v4i64]; +; HSAIL: st_arg_align(8)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v4i64][8]; +; HSAIL: st_arg_align(16)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v4i64][16]; +; HSAIL: st_arg_align(8)_u64 {{\$d[0-9]+}}, [%return_vector_arg_v4i64][24]; +; HSAIL: ret; +define <4 x i64> @return_vector_arg_v4i64(<4 x i64> %x) { + ret <4 x i64> %x +} + +; HSAIL: prog function &return_vector_arg_v8i64(align(64) arg_u64 %return_vector_arg_v8i64[8])(align(64) arg_u64 %x[8]) +define <8 x i64> @return_vector_arg_v8i64(<8 x i64> %x) { + ret <8 x i64> %x +} + +; HSAIL: prog function &return_vector_arg_v16i64(align(128) arg_u64 %return_vector_arg_v16i64[16])(align(128) arg_u64 %x[16]) +define <16 x i64> @return_vector_arg_v16i64(<16 x i64> %x) { + ret <16 x i64> %x +} +%struct.i32pair = type { i32, i32 } + +; HSAIL: prog function &return_struct_arg(align(4) arg_u8 %return_struct_arg[8])(align(4) arg_u8 %x[8]) +; HSAIL-DAG: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_struct_arg][4]; +; HSAIL-DAG: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_struct_arg]; +define %struct.i32pair @return_struct_arg(%struct.i32pair %x) { + ret %struct.i32pair %x +} + +; HSAIL: prog function &struct_global_byval_arg()( +; HSAIL-NEXT: arg_u32 %test_byval_arg, +; HSAIL-NEXT: align(4) arg_u8 %x[8]) + +; HSAIL-DAG: st_global_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+4] +; HSAIL-DAG: st_global_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}] +define void @struct_global_byval_arg(%struct.i32pair addrspace(1)* sret %test_byval_arg, %struct.i32pair %x) { + store %struct.i32pair %x, %struct.i32pair addrspace(1)* %test_byval_arg + ret void +} + +; HSAIL: prog function &struct_private_byval_arg()( +; HSAIL-NEXT: arg_u32 %test_byval_arg, +; HSAIL-NEXT: align(4) arg_u8 %x[8]) + +; HSAIL-DAG: st_private_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+4] +; HSAIL-DAG: st_private_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}] +define void @struct_private_byval_arg(%struct.i32pair* sret %test_byval_arg, %struct.i32pair %x) { + store %struct.i32pair %x, %struct.i32pair* %test_byval_arg + ret void +} + +; HSAIL: prog function &return_arg_array_i32(arg_u32 %return_arg_array_i32[4])(arg_u32 %x[4]) +; HSAIL-DAG: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_arg_array_i32]; +; HSAIL-DAG: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_arg_array_i32][4]; +; HSAIL-DAG: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_arg_array_i32][8]; +; HSAIL-DAG: st_arg_align(4)_u32 {{\$s[0-9]+}}, [%return_arg_array_i32][12]; +; HSAIL: ret; +define [4 x i32] @return_arg_array_i32([4 x i32] %x) { + ret [4 x i32] %x +} Index: test/CodeGen/HSAIL/reverse-branch-condition.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/reverse-branch-condition.ll @@ -0,0 +1,481 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_eq( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_ne_b1_s32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_eq(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp eq i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_ne( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_eq_b1_s32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_ne(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp ne i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_ugt( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_le_b1_u32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_ugt(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp ugt i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_uge( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_lt_b1_u32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_uge(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp uge i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_ult( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_ge_b1_u32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_ult(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp ult i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_ule( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_gt_b1_u32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_ule(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp ule i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_sgt( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_le_b1_s32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_sgt(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp sgt i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_sge( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_lt_b1_s32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_sge(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp sge i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_slt( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_ge_b1_s32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_slt(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp slt i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_i32_sle( +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL0:\$s[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[VAL1:\$s[0-9]+]], [%val1]; +; HSAIL: cmp_gt_b1_s32 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_i32_sle(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val0, i32 %val1) nounwind { + %cmp = icmp sle i32 %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_oeq( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_neu_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_oeq(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp oeq double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_ogt( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_leu_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_ogt(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp ogt double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_oge( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_ltu_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_oge(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp oge double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_olt( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_geu_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_olt(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp olt double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_ole( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_gtu_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_ole(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp ole double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_one( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_equ_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_one(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp one double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_ord( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_nan_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_ord(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp ord double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_ueq( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_ne_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_ueq(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp ueq double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_ugt( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_le_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_ugt(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp ugt double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_uge( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_lt_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_uge(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp uge double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_ult( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_ge_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_ult(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp ult double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_ule( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_gt_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_ule(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp ule double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_une( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_eq_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_une(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp une double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_inv_branch_f64_uno( +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL0:\$d[0-9]+]], [%val0]; +; HSAIL-DAG: ld_arg_align(8)_f64 [[VAL1:\$d[0-9]+]], [%val1]; +; HSAIL: cmp_num_b1_f64 [[CMP:\$c[0-9]+]], [[VAL0]], [[VAL1]]; +; HSAIL: cbr_b1 [[CMP]], @BB[[FNNUM:[0-9]+]]_2; +; HSAIL: st_global_align(4)_u32 222 +; HSAIL: {{^@BB}}[[FNNUM]]{{_2:}} +; HSAIL: ret; +define void @test_inv_branch_f64_uno(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, double %val0, double %val1) nounwind { + %cmp = fcmp uno double %val0, %val1 + br i1 %cmp, label %store, label %end + +store: + store i32 222, i32 addrspace(1)* %out + ret void + +end: + ret void +} Index: test/CodeGen/HSAIL/rotl.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/rotl.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -march=hsail -filetype=asm | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &rotl_i32 +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) { +entry: + %0 = shl i32 %x, %y + %1 = sub i32 32, %y + %2 = lshr i32 %x, %1 + %3 = or i32 %0, %2 + store i32 %3, i32 addrspace(1)* %in + ret void +} + +; FUNC-LABEL: {{^}}prog function &rotl_v2i32 +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { +entry: + %0 = shl <2 x i32> %x, %y + %1 = sub <2 x i32> , %y + %2 = lshr <2 x i32> %x, %1 + %3 = or <2 x i32> %0, %2 + store <2 x i32> %3, <2 x i32> addrspace(1)* %in + ret void +} + +; FUNC-LABEL: {{^}}prog function &rotl_v4i32 +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: neg_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { +entry: + %0 = shl <4 x i32> %x, %y + %1 = sub <4 x i32> , %y + %2 = lshr <4 x i32> %x, %1 + %3 = or <4 x i32> %0, %2 + store <4 x i32> %3, <4 x i32> addrspace(1)* %in + ret void +} Index: test/CodeGen/HSAIL/rotr.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/rotr.ll @@ -0,0 +1,38 @@ +; RUN: llc < %s -march=hsail | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &rotr_i32 +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) { + %tmp0 = sub i32 32, %y + %tmp1 = shl i32 %x, %tmp0 + %tmp2 = lshr i32 %x, %y + %tmp3 = or i32 %tmp1, %tmp2 + store i32 %tmp3, i32 addrspace(1)* %in + ret void +} + +; FUNC-LABEL: {{^}}prog function &rotr_v2i32 +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { + %tmp0 = sub <2 x i32> , %y + %tmp1 = shl <2 x i32> %x, %tmp0 + %tmp2 = lshr <2 x i32> %x, %y + %tmp3 = or <2 x i32> %tmp1, %tmp2 + store <2 x i32> %tmp3, <2 x i32> addrspace(1)* %in + ret void +} + +; FUNC-LABEL: {{^}}prog function &rotr_v4i32 +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: bitalign_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { + %tmp0 = sub <4 x i32> , %y + %tmp1 = shl <4 x i32> %x, %tmp0 + %tmp2 = lshr <4 x i32> %x, %y + %tmp3 = or <4 x i32> %tmp1, %tmp2 + store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %in + ret void +} Index: test/CodeGen/HSAIL/rsqrt.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/rsqrt.ll @@ -0,0 +1,23 @@ +; XFAIL: * +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare float @llvm.sqrt.f32(float) #0 +declare double @llvm.sqrt.f64(double) #0 + +; HSAIL-LABEL: {{^}}prog function &test_rsqrt_f32( +; HSAIL: nsqrt_f32 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define float @test_rsqrt_f32(float %x) #0 { + %sqrt = call float @llvm.sqrt.f32(float %x) #0 + %rsqrt = fdiv float 1.0, %sqrt + ret float %rsqrt +} + +; HSAIL-LABEL: {{^}}prog function &test_rsqrt_f64( +; HSAIL: nsqrt_f64 {{\$s[0-9]+}}, {{\$s[0-9]+}} +define double @test_rsqrt_f64(double %x) #0 { + %sqrt = call double @llvm.sqrt.f64(double %x) #0 + %rsqrt = fdiv double 1.0, %sqrt + ret double %rsqrt +} + +attributes #0 = { nounwind readnone } Index: test/CodeGen/HSAIL/same-reg-vect.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/same-reg-vect.ll @@ -0,0 +1,18 @@ +; XFAIL: * +; Loads can be combined even if memory operands are different in case if base registers are the same. +; In this example two loads will have memory operands based on %a and %1 but still can be combined. +; +; RUN: llc < %s -march=hsail -filetype=asm | FileCheck %s +; CHECK: ld_v2 + +define spir_kernel void @func1(i8 addrspace(4)* nocapture %p, i32 addrspace(1)* nocapture %out) nounwind { +entry: + %a = bitcast i8 addrspace(4)* %p to i32 addrspace(4)* + %0 = load i32, i32 addrspace(4)* %a, align 4 + %b = getelementptr inbounds i8, i8 addrspace(4)* %p, i32 4 + %1 = bitcast i8 addrspace(4)* %b to i32 addrspace(4)* + %2 = load i32, i32 addrspace(4)* %1, align 4 + %add = add i32 %2, %0 + store i32 %add, i32 addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/HSAIL/sdiv.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/sdiv.ll @@ -0,0 +1,98 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &sdiv_i32 +; HSAIL: div_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %num = load i32, i32 addrspace(1) * %in + %den = load i32, i32 addrspace(1) * %den_ptr + %result = sdiv i32 %num, %den + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sdiv_i32_4 +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 31; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 30; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 2; +define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32, i32 addrspace(1) * %in + %result = sdiv i32 %num, 4 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &slow_sdiv_i32_3435 +; XHSAIL: mulhi_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, -1734241525; +; XHSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; XHSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 31; +; XHSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 11; +; XHSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32, i32 addrspace(1) * %in + %result = sdiv i32 %num, 3435 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sdiv_v2i32 +; HSAIL: div_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: div_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %num = load <2 x i32>, <2 x i32> addrspace(1) * %in + %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr + %result = sdiv <2 x i32> %num, %den + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sdiv_v2i32_4 +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 31; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 30; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 2; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 31; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 30; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 2; +define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %num = load <2 x i32>, <2 x i32> addrspace(1) * %in + %result = sdiv <2 x i32> %num, + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sdiv_v4i32 +; HSAIL: div_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: div_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: div_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: div_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %num = load <4 x i32>, <4 x i32> addrspace(1) * %in + %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr + %result = sdiv <4 x i32> %num, %den + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &sdiv_v4i32_4 +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 31; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 30; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 31; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 30; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 2; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 31; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 30; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 2; +define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %num = load <4 x i32>, <4 x i32> addrspace(1) * %in + %result = sdiv <4 x i32> %num, + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/select-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/select-i1.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &select_i1 +; HSAIL-DAG: ld_arg_align(4)_u32 [[COND:\$s[0-9]+]], [%cond]; +; HSAIL-DAG: ld_arg_u8 [[A:\$s[0-9]+]], [%a]; +; HSAIL-DAG: ld_arg_u8 [[B:\$s[0-9]+]], [%b]; + +; HSAIL-DAG: cvt_b1_u32 [[CVTA:\$c[0-9]+]], [[A]]; +; HSAIL-DAG: cvt_b1_u32 [[CVTB:\$c[0-9]+]], [[B]]; +; HSAIL-DAG: cmp_gt_b1_u32 [[CMP:\$c[0-9]+]], [[COND]], 5; + +; HSAIL: cmov_b1 [[RESULT:\$c[0-9]+]], [[CMP]], [[CVTA]], [[CVTB]]; +; HSAIL: cvt_s32_b1 [[CVTRESULT:\$s[0-9]+]], [[RESULT]]; +; HSAIL: st_global_align(4)_u8 [[CVTRESULT]] +define void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind { + %cmp = icmp ugt i32 %cond, 5 + %sel = select i1 %cmp, i1 %a, i1 %b + store i1 %sel, i1 addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/HSAIL/selectcc-cnd.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/selectcc-cnd.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cmp_eq_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL-NEXT: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 1065353216, 1073741824; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { + %1 = load float, float addrspace(1)* %in + %2 = fcmp oeq float %1, 0.0 + %3 = select i1 %2, float 1.0, float 2.0 + store float %3, float addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/selectcc-cnde-int.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/selectcc-cnde-int.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%in]; +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 1, 2; +; HSAIL: ld_arg_align(4)_u32 {{\$s[0-9]+}}, [%out]; +define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %1 = load i32, i32 addrspace(1)* %in + %2 = icmp eq i32 %1, 0 + %3 = select i1 %2, i32 1, i32 2 + store i32 %3, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/setcc-i1-assert.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/setcc-i1-assert.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; FIXME: cmp_ne_b1_b1 +; HSAIL-LABEL: {{^}}prog function &setcc_i1_branch( +; HSAIL-DAG: cmp_gt_b1_s32 [[C0:\$c[0-9]+]], {{\$s[0-9]+}}, 0; +; HSAIL-DAG: cmp_lt_b1_s32 [[C1:\$c[0-9]+]], {{\$s[0-9]+}}, 0; +; HSAIL: and_b1 [[AND:\$c[0-9]+]], [[C0]], [[C1]]; +; HSAIL: cmp_ne_b1_b1 {{\$c[0-9]+}}, [[AND]], 1; +; HSAIL: ret +define void @setcc_i1_branch(i32 %arg, i32 %arg1) #0 { +bb: + %tmp = icmp sgt i32 %arg1, 0 + %tmp2 = icmp slt i32 %arg, 0 + %tmp3 = and i1 %tmp, %tmp2 + br i1 %tmp3, label %bb4, label %bb8 + +bb4: ; preds = %bb + ret void + +bb8: ; preds = %bb6 + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/setcc.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/setcc.ll @@ -0,0 +1,359 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &setcc_v2i32 +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) { + %result = icmp eq <2 x i32> %a, %b + %sext = sext <2 x i1> %result to <2 x i32> + store <2 x i32> %sext, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &setcc_v4i32 +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in + %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr + %result = icmp eq <4 x i32> %a, %b + %sext = sext <4 x i1> %result to <4 x i32> + store <4 x i32> %sext, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_oeq +; HSAIL: cmp_eq_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp oeq float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_ogt +; HSAIL: cmp_gt_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp ogt float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_oge +; HSAIL: cmp_ge_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp oge float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_olt +; HSAIL: cmp_lt_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp olt float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_ole +; HSAIL: cmp_le_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp ole float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_one +; HSAIL: cmp_ne_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp one float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_ord +; HSAIL: cmp_num_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp ord float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_ueq +; HSAIL: cmp_equ_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; + + +define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp ueq float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_ugt +; HSAIL: cmp_gtu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; + +define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp ugt float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_uge +; HSAIL: cmp_geu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; + +define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp uge float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_ult +; HSAIL: cmp_ltu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp ult float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_ule +; HSAIL: cmp_leu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; + +define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp ule float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_une +; HSAIL: cmp_neu_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; + +define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp une float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &f32_uno +; HSAIL: cmp_nan_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; + +define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) { + %tmp0 = fcmp uno float %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_eq +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp eq i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_ne +; HSAIL: cmp_ne_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp ne i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_ugt +; HSAIL: cmp_gt_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp ugt i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_uge +; HSAIL: cmp_ge_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp uge i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_ult +; HSAIL: cmp_lt_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp ult i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_ule +; HSAIL: cmp_le_b1_u32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp ule i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_sgt +; HSAIL: cmp_gt_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp sgt i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_sge +; HSAIL: cmp_ge_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp sge i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_slt +; HSAIL: cmp_lt_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp slt i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i32_sle +; HSAIL: cmp_le_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, 0; +define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %tmp0 = icmp sle i32 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_eq( +define void @i1_eq(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp eq i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_ne( +define void @i1_ne(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp ne i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_ugt( +define void @i1_ugt(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp ugt i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_uge( +define void @i1_uge(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp uge i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_ult( +define void @i1_ult(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp ult i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_ule( +define void @i1_ule(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp ule i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_sgt( +define void @i1_sgt(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp sgt i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_sge( +define void @i1_sge(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp sge i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_slt( +define void @i1_slt(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp slt i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &i1_sle( +define void @i1_sle(i32 addrspace(1)* %out, i1 %a, i1 %b) { + %tmp0 = icmp sle i1 %a, %b + %tmp1 = sext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/shift64.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/shift64.ll @@ -0,0 +1,79 @@ +; RUN: llc -march=hsail < %s | FileCheck %s + +; CHECK-LABEL: {{^}}prog function &shiftl64_ir( +; CHECK: {{shl_u64 \$d[0-9]+, 9756277979052589857, \$s[0-9]+;}} +define void @shiftl64_ir(i64 addrspace(1)* nocapture %out, i64 %amt) nounwind { + %tmp2 = and i64 %amt, 63 + %tmp3 = shl i64 -8690466094656961759, %tmp2 + store i64 %tmp3, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL: {{^}}prog function &shiftl64_ri( +; CHECK: {{shl_u64 \$d[0-9]+, \$d[0-9], 1;}} +define void @shiftl64_ri(i64 addrspace(1)* nocapture %out, i64 %val) nounwind { + %tmp2 = shl i64 %val, 1 + store i64 %tmp2, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL: {{^}}prog function &shiftl64_rr( +; CHECK: {{shl_u64 \$d[0-9]+, \$d[0-9], \$s[0-9]+;}} +define void @shiftl64_rr(i64 addrspace(1)* nocapture %out, i64 %val, i64 %amt) nounwind { + %tmp3 = and i64 %amt, 63 + %tmp4 = shl i64 %val, %tmp3 + store i64 %tmp4, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL: {{^}}prog function &shiftr64s_ir( +; CHECK: {{shr_s64 \$d[0-9]+, -8690466094656961759, \$s[0-9]+;}} +define void @shiftr64s_ir(i64 addrspace(1)* nocapture %out, i64 %amt) nounwind { + %tmp2 = and i64 %amt, 63 + %tmp3 = ashr i64 -8690466094656961759, %tmp2 + store i64 %tmp3, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL: {{^}}prog function &shiftr64s_ri( +; CHECK: {{shr_s64 \$d[0-9]+, \$d[0-9], 1;}} +define void @shiftr64s_ri(i64 addrspace(1)* nocapture %out, i64 %val) nounwind { + %tmp2 = ashr i64 %val, 1 + store i64 %tmp2, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL: {{^}}prog function &shiftr64s_rr( +; CHECK: {{shr_s64 \$d[0-9]+, \$d[0-9], \$s[0-9]+;}} +define void @shiftr64s_rr(i64 addrspace(1)* nocapture %out, i64 %val, i64 %amt) nounwind { + %tmp3 = and i64 %amt, 63 + %tmp4 = ashr i64 %val, %tmp3 + store i64 %tmp4, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL: {{^}}prog function &shiftr64u_ir( +; CHECK: {{shr_u64 \$d[0-9]+, 9756277979052589857, \$s[0-9]+;}} +define void @shiftr64u_ir(i64 addrspace(1)* nocapture %out, i64 %amt) nounwind { + %tmp2 = and i64 %amt, 63 + %tmp3 = lshr i64 -8690466094656961759, %tmp2 + store i64 %tmp3, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL: {{^}}prog function &shiftr64u_ri( +; CHECK: {{shr_u64 \$d[0-9]+, \$d[0-9], 1;}} +define void @shiftr64u_ri(i64 addrspace(1)* nocapture %out, i64 %val) nounwind { + %tmp2 = lshr i64 %val, 1 + store i64 %tmp2, i64 addrspace(1)* %out, align 8 + ret void +} + +; CHECK-LABEL: {{^}}prog function &shiftr64u_rr( +; CHECK: {{shr_u64 \$d[0-9]+, \$d[0-9], \$s[0-9]+;}} +define void @shiftr64u_rr(i64 addrspace(1)* nocapture %out, i64 %val, i64 %amt) nounwind { + %tmp3 = and i64 %amt, 63 + %tmp4 = lshr i64 %val, %tmp3 + store i64 %tmp4, i64 addrspace(1)* %out, align 8 + ret void +} Index: test/CodeGen/HSAIL/sint_to_fp.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/sint_to_fp.ll @@ -0,0 +1,68 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &s_sint_to_fp_i32_to_f32( +; HSAIL: cvt_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) { + %result = sitofp i32 %in to float + store float %result, float addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &sint_to_fp_v2i32( +; HSAIL: cvt_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { + %result = sitofp <2 x i32> %in to <2 x float> + store <2 x float> %result, <2 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &sint_to_fp_v4i32( +; HSAIL: cvt_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32>, <4 x i32> addrspace(1) * %in + %result = sitofp <4 x i32> %value to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &sint_to_fp_i1_f32( +; HSAIL: cmp_eq_b1_s32 [[CMP:\$c[0-9]+]] +; HSAIL: cmov_b32 [[RESULT:\$s[0-9]+]], [[CMP]], 3212836864, 0; +; HSAIL: st_global_align(4)_u32 [[RESULT]] +define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = sitofp i1 %cmp to float + store float %fp, float addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &sint_to_fp_i32_to_f64( +; HSAIL: cvt_f64_s32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { + %result = sitofp i32 %in to double + store double %result, double addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &sint_to_fp_i1_f64( +; HSAIL: cmp_eq_b1_s32 [[CMP:\$c[0-9]+]] +; HSAIL: cmov_b64 [[RESULT:\$d[0-9]+]], [[CMP]], 13830554455654793216, 0; +; HSAIL: st_global_align(8)_u64 [[RESULT]] +define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = sitofp i1 %cmp to double + store double %fp, double addrspace(1)* %out, align 8 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &s_sint_to_fp_i64_to_f64( +; HSAIL: cvt_f64_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { + %result = sitofp i64 %in to double + store double %result, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/spill-cond-reg-with-scavenging.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/spill-cond-reg-with-scavenging.ll @@ -0,0 +1,458 @@ +; RUN: llc -O0 -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +declare void @llvm.HSAIL.barrier() #2 + +; Spill crs and have high enough s register pressure so that the +; register scavenger needs to create a new stack slot to handle the +; temporary registers created during the spill_b1/restore_b1 +; expansion. + +; HSAIL-LABEL: {{^}}prog function &test_spill_cond_reg_with_scavenging()( +; HSAIL: align(4) spill_u8 %__spillStack[164]; +; HSAIL: {{^[ \t]}}spill_u32 %___spillScavenge; + +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][4]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][8]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][12]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][16]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][20]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][24]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][28]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][32]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][36]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][40]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][44]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][48]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][52]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][56]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][60]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][64]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][68]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][72]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][76]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][80]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][84]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][88]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][92]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][96]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][100]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][104]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][108]; +; HSAIL: ld_spill_align(4)_u32 $s0, [%__spillStack]; + +; HSAIL: cmp_eq_b1_s32 $c0, +; HSAIL-NEXT: cmp_eq_b1_s32 $c1, +; HSAIL-NEXT: cmp_eq_b1_s32 $c2, +; HSAIL-NEXT: cmp_eq_b1_s32 $c3, +; HSAIL-NEXT: cmp_eq_b1_s32 $c4, +; HSAIL-NEXT: cmp_eq_b1_s32 $c5, +; HSAIL-NEXT: cmp_eq_b1_s32 $c6, +; HSAIL-NEXT: cmp_eq_b1_s32 $c7, + +; $s0 is spilled and restored to scavenged slot for conversion +; HSAIL-NEXT: st_spill_align(4)_u32 $s0, [%___spillScavenge]; +; HSAIL-NEXT: cvt_u32_b1 $s0, $c0; +; HSAIL: barrier; +; HSAIL: ld_spill_align(4)_u32 $s0, [%___spillScavenge]; + + +; Make sure we use the last slots +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][156]; +; HSAIL: st_spill_align(4)_u32 $s0, [%__spillStack][160]; + +define void @test_spill_cond_reg_with_scavenging(i1 addrspace(1)* %out, i32 addrspace(1)* %in, i32 addrspace(1)* %s) #0 { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %c_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 2 + %d_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 3 + %e_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 4 + %a = load volatile i32, i32 addrspace(1)* %in + %b = load volatile i32, i32 addrspace(1)* %b_ptr + %c = load volatile i32, i32 addrspace(1)* %c_ptr + %d = load volatile i32, i32 addrspace(1)* %d_ptr + %e = load volatile i32, i32 addrspace(1)* %e_ptr + +; Create lots of s register pressure. + %s0_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 0 + %s1_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 1 + %s2_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 2 + %s3_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 3 + %s4_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 4 + %s5_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 5 + %s6_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 6 + %s7_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 7 + %s8_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 8 + %s9_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 9 + %s10_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 10 + %s11_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 11 + %s12_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 12 + %s13_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 13 + %s14_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 14 + %s15_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 15 + %s16_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 16 + %s17_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 17 + %s18_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 18 + %s19_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 19 + %s20_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 20 + %s21_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 21 + %s22_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 22 + %s23_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 23 + %s24_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 24 + %s25_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 25 + %s26_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 26 + %s27_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 27 + %s28_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 28 + %s29_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 29 + %s30_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 30 + %s31_ptr = getelementptr i32, i32 addrspace(1)* %s, i32 31 + + %s0 = load volatile i32, i32 addrspace(1)* %s0_ptr + %s1 = load volatile i32, i32 addrspace(1)* %s1_ptr + %s2 = load volatile i32, i32 addrspace(1)* %s2_ptr + %s3 = load volatile i32, i32 addrspace(1)* %s3_ptr + %s4 = load volatile i32, i32 addrspace(1)* %s4_ptr + %s5 = load volatile i32, i32 addrspace(1)* %s5_ptr + %s6 = load volatile i32, i32 addrspace(1)* %s6_ptr + %s7 = load volatile i32, i32 addrspace(1)* %s7_ptr + %s8 = load volatile i32, i32 addrspace(1)* %s8_ptr + %s9 = load volatile i32, i32 addrspace(1)* %s9_ptr + %s10 = load volatile i32, i32 addrspace(1)* %s10_ptr + %s11 = load volatile i32, i32 addrspace(1)* %s11_ptr + %s12 = load volatile i32, i32 addrspace(1)* %s12_ptr + %s13 = load volatile i32, i32 addrspace(1)* %s13_ptr + %s14 = load volatile i32, i32 addrspace(1)* %s14_ptr + %s15 = load volatile i32, i32 addrspace(1)* %s15_ptr + %s16 = load volatile i32, i32 addrspace(1)* %s16_ptr + %s17 = load volatile i32, i32 addrspace(1)* %s17_ptr + %s18 = load volatile i32, i32 addrspace(1)* %s18_ptr + %s19 = load volatile i32, i32 addrspace(1)* %s19_ptr + %s20 = load volatile i32, i32 addrspace(1)* %s20_ptr + %s21 = load volatile i32, i32 addrspace(1)* %s21_ptr + %s22 = load volatile i32, i32 addrspace(1)* %s22_ptr + %s23 = load volatile i32, i32 addrspace(1)* %s23_ptr + %s24 = load volatile i32, i32 addrspace(1)* %s24_ptr + %s25 = load volatile i32, i32 addrspace(1)* %s25_ptr + %s26 = load volatile i32, i32 addrspace(1)* %s26_ptr + %s27 = load volatile i32, i32 addrspace(1)* %s27_ptr + %s28 = load volatile i32, i32 addrspace(1)* %s28_ptr + %s29 = load volatile i32, i32 addrspace(1)* %s29_ptr + %s30 = load volatile i32, i32 addrspace(1)* %s30_ptr + %s31 = load volatile i32, i32 addrspace(1)* %s31_ptr + + %s32 = load volatile i32, i32 addrspace(1)* %s + %s33 = load volatile i32, i32 addrspace(1)* %s + %s34 = load volatile i32, i32 addrspace(1)* %s + %s35 = load volatile i32, i32 addrspace(1)* %s + %s36 = load volatile i32, i32 addrspace(1)* %s + %s37 = load volatile i32, i32 addrspace(1)* %s + %s38 = load volatile i32, i32 addrspace(1)* %s + %s39 = load volatile i32, i32 addrspace(1)* %s + %s40 = load volatile i32, i32 addrspace(1)* %s + %s41 = load volatile i32, i32 addrspace(1)* %s + %s42 = load volatile i32, i32 addrspace(1)* %s + %s43 = load volatile i32, i32 addrspace(1)* %s + %s44 = load volatile i32, i32 addrspace(1)* %s + %s45 = load volatile i32, i32 addrspace(1)* %s + %s46 = load volatile i32, i32 addrspace(1)* %s + %s47 = load volatile i32, i32 addrspace(1)* %s + %s48 = load volatile i32, i32 addrspace(1)* %s + %s49 = load volatile i32, i32 addrspace(1)* %s + %s50 = load volatile i32, i32 addrspace(1)* %s + %s51 = load volatile i32, i32 addrspace(1)* %s + %s52 = load volatile i32, i32 addrspace(1)* %s + %s53 = load volatile i32, i32 addrspace(1)* %s + %s54 = load volatile i32, i32 addrspace(1)* %s + %s55 = load volatile i32, i32 addrspace(1)* %s + %s56 = load volatile i32, i32 addrspace(1)* %s + %s57 = load volatile i32, i32 addrspace(1)* %s + %s58 = load volatile i32, i32 addrspace(1)* %s + %s59 = load volatile i32, i32 addrspace(1)* %s + %s60 = load volatile i32, i32 addrspace(1)* %s + %s61 = load volatile i32, i32 addrspace(1)* %s + %s62 = load volatile i32, i32 addrspace(1)* %s + %s63 = load volatile i32, i32 addrspace(1)* %s + + %s64 = load volatile i32, i32 addrspace(1)* %s + %s65 = load volatile i32, i32 addrspace(1)* %s + %s66 = load volatile i32, i32 addrspace(1)* %s + %s67 = load volatile i32, i32 addrspace(1)* %s + %s68 = load volatile i32, i32 addrspace(1)* %s + %s69 = load volatile i32, i32 addrspace(1)* %s + %s70 = load volatile i32, i32 addrspace(1)* %s + %s71 = load volatile i32, i32 addrspace(1)* %s + %s72 = load volatile i32, i32 addrspace(1)* %s + %s73 = load volatile i32, i32 addrspace(1)* %s + %s74 = load volatile i32, i32 addrspace(1)* %s + %s75 = load volatile i32, i32 addrspace(1)* %s + %s76 = load volatile i32, i32 addrspace(1)* %s + %s77 = load volatile i32, i32 addrspace(1)* %s + %s78 = load volatile i32, i32 addrspace(1)* %s + %s79 = load volatile i32, i32 addrspace(1)* %s + %s80 = load volatile i32, i32 addrspace(1)* %s + %s81 = load volatile i32, i32 addrspace(1)* %s + %s82 = load volatile i32, i32 addrspace(1)* %s + %s83 = load volatile i32, i32 addrspace(1)* %s + %s84 = load volatile i32, i32 addrspace(1)* %s + %s85 = load volatile i32, i32 addrspace(1)* %s + %s86 = load volatile i32, i32 addrspace(1)* %s + %s87 = load volatile i32, i32 addrspace(1)* %s + %s88 = load volatile i32, i32 addrspace(1)* %s + %s89 = load volatile i32, i32 addrspace(1)* %s + %s90 = load volatile i32, i32 addrspace(1)* %s + %s91 = load volatile i32, i32 addrspace(1)* %s + %s92 = load volatile i32, i32 addrspace(1)* %s + %s93 = load volatile i32, i32 addrspace(1)* %s + %s94 = load volatile i32, i32 addrspace(1)* %s + %s95 = load volatile i32, i32 addrspace(1)* %s + + %s96 = load volatile i32, i32 addrspace(1)* %s + %s97 = load volatile i32, i32 addrspace(1)* %s + %s98 = load volatile i32, i32 addrspace(1)* %s + %s99 = load volatile i32, i32 addrspace(1)* %s + %s100 = load volatile i32, i32 addrspace(1)* %s + %s101 = load volatile i32, i32 addrspace(1)* %s + %s102 = load volatile i32, i32 addrspace(1)* %s + %s103 = load volatile i32, i32 addrspace(1)* %s + %s104 = load volatile i32, i32 addrspace(1)* %s + %s105 = load volatile i32, i32 addrspace(1)* %s + %s106 = load volatile i32, i32 addrspace(1)* %s + %s107 = load volatile i32, i32 addrspace(1)* %s + %s108 = load volatile i32, i32 addrspace(1)* %s + %s109 = load volatile i32, i32 addrspace(1)* %s + %s110 = load volatile i32, i32 addrspace(1)* %s + %s111 = load volatile i32, i32 addrspace(1)* %s + %s112 = load volatile i32, i32 addrspace(1)* %s + %s113 = load volatile i32, i32 addrspace(1)* %s + %s114 = load volatile i32, i32 addrspace(1)* %s + %s115 = load volatile i32, i32 addrspace(1)* %s + %s116 = load volatile i32, i32 addrspace(1)* %s + %s117 = load volatile i32, i32 addrspace(1)* %s + %s118 = load volatile i32, i32 addrspace(1)* %s + %s119 = load volatile i32, i32 addrspace(1)* %s + %s120 = load volatile i32, i32 addrspace(1)* %s + %s121 = load volatile i32, i32 addrspace(1)* %s + %s122 = load volatile i32, i32 addrspace(1)* %s + %s123 = load volatile i32, i32 addrspace(1)* %s + %s124 = load volatile i32, i32 addrspace(1)* %s + %s125 = load volatile i32, i32 addrspace(1)* %s + %s126 = load volatile i32, i32 addrspace(1)* %s + %s127 = load volatile i32, i32 addrspace(1)* %s + + %s128 = load volatile i32, i32 addrspace(1)* %s + %s129 = load volatile i32, i32 addrspace(1)* %s + %s130 = load volatile i32, i32 addrspace(1)* %s + %s131 = load volatile i32, i32 addrspace(1)* %s + %s132 = load volatile i32, i32 addrspace(1)* %s + + %cmp0 = icmp eq i32 %a, 0 + %cmp1 = icmp eq i32 %a, %b + %cmp2 = icmp eq i32 %a, %c + %cmp3 = icmp eq i32 %a, %d + %cmp4 = icmp eq i32 %a, %e + %cmp5 = icmp eq i32 %b, %c + %cmp6 = icmp eq i32 %b, %d + %cmp7 = icmp eq i32 %b, %e + %cmp8 = icmp eq i32 %b, 0 + %cmp9 = icmp eq i32 %c, 0 + %cmp10 = icmp eq i32 %d, 0 + %cmp11 = icmp eq i32 %e, 0 + + call void @llvm.HSAIL.barrier() #2 + + store volatile i1 %cmp0, i1 addrspace(1)* %out + store volatile i1 %cmp1, i1 addrspace(1)* %out + store volatile i1 %cmp2, i1 addrspace(1)* %out + store volatile i1 %cmp3, i1 addrspace(1)* %out + store volatile i1 %cmp4, i1 addrspace(1)* %out + store volatile i1 %cmp5, i1 addrspace(1)* %out + store volatile i1 %cmp6, i1 addrspace(1)* %out + store volatile i1 %cmp7, i1 addrspace(1)* %out + store volatile i1 %cmp8, i1 addrspace(1)* %out + store volatile i1 %cmp9, i1 addrspace(1)* %out + store volatile i1 %cmp10, i1 addrspace(1)* %out + store volatile i1 %cmp11, i1 addrspace(1)* %out + + store volatile i32 %s0, i32 addrspace(1)* %s + store volatile i32 %s1, i32 addrspace(1)* %s + store volatile i32 %s2, i32 addrspace(1)* %s + store volatile i32 %s3, i32 addrspace(1)* %s + store volatile i32 %s4, i32 addrspace(1)* %s + store volatile i32 %s5, i32 addrspace(1)* %s + store volatile i32 %s6, i32 addrspace(1)* %s + store volatile i32 %s7, i32 addrspace(1)* %s + store volatile i32 %s8, i32 addrspace(1)* %s + store volatile i32 %s9, i32 addrspace(1)* %s + store volatile i32 %s10, i32 addrspace(1)* %s + store volatile i32 %s11, i32 addrspace(1)* %s + store volatile i32 %s12, i32 addrspace(1)* %s + store volatile i32 %s13, i32 addrspace(1)* %s + store volatile i32 %s14, i32 addrspace(1)* %s + store volatile i32 %s15, i32 addrspace(1)* %s + store volatile i32 %s16, i32 addrspace(1)* %s + store volatile i32 %s17, i32 addrspace(1)* %s + store volatile i32 %s18, i32 addrspace(1)* %s + store volatile i32 %s19, i32 addrspace(1)* %s + store volatile i32 %s20, i32 addrspace(1)* %s + store volatile i32 %s21, i32 addrspace(1)* %s + store volatile i32 %s22, i32 addrspace(1)* %s + store volatile i32 %s23, i32 addrspace(1)* %s + store volatile i32 %s24, i32 addrspace(1)* %s + store volatile i32 %s25, i32 addrspace(1)* %s + store volatile i32 %s26, i32 addrspace(1)* %s + store volatile i32 %s27, i32 addrspace(1)* %s + store volatile i32 %s28, i32 addrspace(1)* %s + store volatile i32 %s29, i32 addrspace(1)* %s + store volatile i32 %s30, i32 addrspace(1)* %s + store volatile i32 %s31, i32 addrspace(1)* %s + store volatile i32 %s32, i32 addrspace(1)* %s + store volatile i32 %s33, i32 addrspace(1)* %s + store volatile i32 %s34, i32 addrspace(1)* %s + store volatile i32 %s35, i32 addrspace(1)* %s + store volatile i32 %s36, i32 addrspace(1)* %s + store volatile i32 %s37, i32 addrspace(1)* %s + store volatile i32 %s38, i32 addrspace(1)* %s + store volatile i32 %s39, i32 addrspace(1)* %s + store volatile i32 %s40, i32 addrspace(1)* %s + store volatile i32 %s41, i32 addrspace(1)* %s + store volatile i32 %s42, i32 addrspace(1)* %s + store volatile i32 %s43, i32 addrspace(1)* %s + store volatile i32 %s44, i32 addrspace(1)* %s + store volatile i32 %s45, i32 addrspace(1)* %s + store volatile i32 %s46, i32 addrspace(1)* %s + store volatile i32 %s47, i32 addrspace(1)* %s + store volatile i32 %s48, i32 addrspace(1)* %s + store volatile i32 %s49, i32 addrspace(1)* %s + store volatile i32 %s50, i32 addrspace(1)* %s + store volatile i32 %s51, i32 addrspace(1)* %s + store volatile i32 %s52, i32 addrspace(1)* %s + store volatile i32 %s53, i32 addrspace(1)* %s + store volatile i32 %s54, i32 addrspace(1)* %s + store volatile i32 %s55, i32 addrspace(1)* %s + store volatile i32 %s56, i32 addrspace(1)* %s + store volatile i32 %s57, i32 addrspace(1)* %s + store volatile i32 %s58, i32 addrspace(1)* %s + store volatile i32 %s59, i32 addrspace(1)* %s + store volatile i32 %s60, i32 addrspace(1)* %s + store volatile i32 %s61, i32 addrspace(1)* %s + store volatile i32 %s62, i32 addrspace(1)* %s + store volatile i32 %s63, i32 addrspace(1)* %s + store volatile i32 %s64, i32 addrspace(1)* %s + store volatile i32 %s65, i32 addrspace(1)* %s + store volatile i32 %s66, i32 addrspace(1)* %s + store volatile i32 %s67, i32 addrspace(1)* %s + store volatile i32 %s68, i32 addrspace(1)* %s + store volatile i32 %s69, i32 addrspace(1)* %s + store volatile i32 %s70, i32 addrspace(1)* %s + store volatile i32 %s71, i32 addrspace(1)* %s + store volatile i32 %s72, i32 addrspace(1)* %s + store volatile i32 %s73, i32 addrspace(1)* %s + store volatile i32 %s74, i32 addrspace(1)* %s + store volatile i32 %s75, i32 addrspace(1)* %s + store volatile i32 %s76, i32 addrspace(1)* %s + store volatile i32 %s77, i32 addrspace(1)* %s + store volatile i32 %s78, i32 addrspace(1)* %s + store volatile i32 %s79, i32 addrspace(1)* %s + store volatile i32 %s80, i32 addrspace(1)* %s + store volatile i32 %s81, i32 addrspace(1)* %s + store volatile i32 %s82, i32 addrspace(1)* %s + store volatile i32 %s83, i32 addrspace(1)* %s + store volatile i32 %s84, i32 addrspace(1)* %s + store volatile i32 %s85, i32 addrspace(1)* %s + store volatile i32 %s86, i32 addrspace(1)* %s + store volatile i32 %s87, i32 addrspace(1)* %s + store volatile i32 %s88, i32 addrspace(1)* %s + store volatile i32 %s89, i32 addrspace(1)* %s + store volatile i32 %s90, i32 addrspace(1)* %s + store volatile i32 %s91, i32 addrspace(1)* %s + store volatile i32 %s92, i32 addrspace(1)* %s + store volatile i32 %s93, i32 addrspace(1)* %s + store volatile i32 %s94, i32 addrspace(1)* %s + store volatile i32 %s95, i32 addrspace(1)* %s + store volatile i32 %s96, i32 addrspace(1)* %s + store volatile i32 %s97, i32 addrspace(1)* %s + store volatile i32 %s98, i32 addrspace(1)* %s + store volatile i32 %s99, i32 addrspace(1)* %s + store volatile i32 %s100, i32 addrspace(1)* %s + store volatile i32 %s101, i32 addrspace(1)* %s + store volatile i32 %s102, i32 addrspace(1)* %s + store volatile i32 %s103, i32 addrspace(1)* %s + store volatile i32 %s104, i32 addrspace(1)* %s + store volatile i32 %s105, i32 addrspace(1)* %s + store volatile i32 %s106, i32 addrspace(1)* %s + store volatile i32 %s107, i32 addrspace(1)* %s + store volatile i32 %s108, i32 addrspace(1)* %s + store volatile i32 %s109, i32 addrspace(1)* %s + store volatile i32 %s110, i32 addrspace(1)* %s + store volatile i32 %s111, i32 addrspace(1)* %s + store volatile i32 %s112, i32 addrspace(1)* %s + store volatile i32 %s113, i32 addrspace(1)* %s + store volatile i32 %s114, i32 addrspace(1)* %s + store volatile i32 %s115, i32 addrspace(1)* %s + store volatile i32 %s116, i32 addrspace(1)* %s + store volatile i32 %s117, i32 addrspace(1)* %s + store volatile i32 %s118, i32 addrspace(1)* %s + store volatile i32 %s119, i32 addrspace(1)* %s + store volatile i32 %s120, i32 addrspace(1)* %s + store volatile i32 %s121, i32 addrspace(1)* %s + store volatile i32 %s122, i32 addrspace(1)* %s + store volatile i32 %s123, i32 addrspace(1)* %s + store volatile i32 %s124, i32 addrspace(1)* %s + store volatile i32 %s125, i32 addrspace(1)* %s + store volatile i32 %s126, i32 addrspace(1)* %s + store volatile i32 %s127, i32 addrspace(1)* %s + store volatile i32 %s128, i32 addrspace(1)* %s + store volatile i32 %s129, i32 addrspace(1)* %s + store volatile i32 %s130, i32 addrspace(1)* %s + store volatile i32 %s131, i32 addrspace(1)* %s + store volatile i32 %s132, i32 addrspace(1)* %s + + br i1 %cmp0, label %bb, label %cc + +bb: + %and0 = and i1 %cmp1, %cmp2 + %and1 = and i1 %and0, %cmp3 + %and2 = and i1 %and1, %cmp4 + %and3 = and i1 %and2, %cmp5 + %and4 = and i1 %and3, %cmp6 + %and5 = and i1 %and4, %cmp7 + %and6 = and i1 %and5, %cmp8 + %and7 = and i1 %and6, %cmp8 + %and8 = and i1 %and7, %cmp9 + %and9 = and i1 %and8, %cmp10 + %and10 = and i1 %and9, %cmp11 + store volatile i1 %and10, i1 addrspace(1)* %out + ret void + +cc: + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind noduplicate } Index: test/CodeGen/HSAIL/spill-cond-reg.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/spill-cond-reg.ll @@ -0,0 +1,182 @@ +; RUN: llc -O0 -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_spill_cond_reg +; HSAIL: { +; HSAIL: align(4) spill_u8 %__spillStack[36]; +; HSAIL-DAG: cmp_eq_b1_s32 $c0 +; HSAIL-DAG: cmp_eq_b1_s32 $c1 +; HSAIL-DAG: cmp_eq_b1_s32 $c2 +; HSAIL-DAG: cmp_eq_b1_s32 $c3 +; HSAIL-DAG: cmp_eq_b1_s32 $c4 +; HSAIL-DAG: cmp_eq_b1_s32 $c5 +; HSAIL-DAG: cmp_eq_b1_s32 $c6 +; HSAIL-DAG: cmp_eq_b1_s32 $c7 +; HSAIL-DAG: cvt_u32_b1 [[CVT_C0:\$s[0-9]+]], $c0 +; HSAIL-DAG: cvt_u32_b1 [[CVT_C1:\$s[0-9]+]], $c1 +; HSAIL-DAG: cvt_u32_b1 [[CVT_C2:\$s[0-9]+]], $c2 +; HSAIL-DAG: cvt_u32_b1 [[CVT_C3:\$s[0-9]+]], $c3 +; HSAIL-DAG: cvt_u32_b1 [[CVT_C4:\$s[0-9]+]], $c4 +; HSAIL-DAG: cvt_u32_b1 [[CVT_C5:\$s[0-9]+]], $c5 +; HSAIL-DAG: cvt_u32_b1 [[CVT_C6:\$s[0-9]+]], $c6 +; HSAIL-DAG: cvt_u32_b1 [[CVT_C7:\$s[0-9]+]], $c7 +; HSAIL-DAG: st_spill_align(4)_u32 [[CVT_C0]], [%__spillStack]; +; HSAIL-DAG: st_spill_align(4)_u32 [[CVT_C1]], +; HSAIL-DAG: st_spill_align(4)_u32 [[CVT_C2]], +; HSAIL-DAG: st_spill_align(4)_u32 [[CVT_C3]], +; HSAIL-DAG: st_spill_align(4)_u32 [[CVT_C4]], +; HSAIL-DAG: st_spill_align(4)_u32 [[CVT_C5]], +; HSAIL-DAG: st_spill_align(4)_u32 [[CVT_C6]], +; HSAIL-DAG: st_spill_align(4)_u32 [[CVT_C7]], + +; HSAIL: br @BB0_1; + +; HSAIL: @BB0_1: +; HSAIL-DAG: ld_spill_align(4)_u32 [[RELOAD_C0:\$s[0-9]+]], [%__spillStack]; +; HSAIL-DAG: cvt_b1_u32 {{\$c[0-9]+}}, [[RELOAD_C0]] + +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][4]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][8]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][12]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][16]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][20]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][24]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][28]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][32]; + +; HSAIL-DAG: cvt_b1_u32 $c{{[0-9]+}}, $s{{[0-9]+}} +; HSAIL-DAG: cvt_b1_u32 $c{{[0-9]+}}, $s{{[0-9]+}} +; HSAIL-DAG: cvt_b1_u32 $c{{[0-9]+}}, $s{{[0-9]+}} +; HSAIL-DAG: cvt_b1_u32 $c{{[0-9]+}}, $s{{[0-9]+}} +; HSAIL-DAG: cvt_b1_u32 $c{{[0-9]+}}, $s{{[0-9]+}} +; HSAIL-DAG: cvt_b1_u32 $c{{[0-9]+}}, $s{{[0-9]+}} +; HSAIL-DAG: cvt_b1_u32 $c{{[0-9]+}}, $s{{[0-9]+}} + +; HSAIL-DAG: and_b1 +; HSAIL-DAG: and_b1 +; HSAIL-DAG: and_b1 +; HSAIL-DAG: and_b1 +; HSAIL-DAG: and_b1 +; HSAIL-DAG: and_b1 +; HSAIL-DAG: and_b1 +; HSAIL-DAG: cvt_s32_b1 [[RESULT:\$s[0-9]+]] +; HSAIL-DAG: st_global_u8 [[RESULT]] +; HSAIL: ret; + +; HSAIL: @BB0_2: +; HSAIL: ret; +define void @test_spill_cond_reg(i1 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %c_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 2 + %d_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 3 + %e_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 4 + %f_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 5 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %c = load i32, i32 addrspace(1)* %c_ptr + %d = load i32, i32 addrspace(1)* %d_ptr + %e = load i32, i32 addrspace(1)* %e_ptr + + %cmp0 = icmp eq i32 %a, 0 + %cmp1 = icmp eq i32 %a, %b + %cmp2 = icmp eq i32 %a, %c + %cmp3 = icmp eq i32 %a, %d + %cmp4 = icmp eq i32 %a, %e + %cmp5 = icmp eq i32 %b, %c + %cmp6 = icmp eq i32 %b, %d + %cmp7 = icmp eq i32 %b, %e + %cmp8 = icmp eq i32 %b, 0 + br i1 %cmp0, label %bb, label %cc + +bb: + %and0 = and i1 %cmp1, %cmp2 + %and1 = and i1 %and0, %cmp3 + %and2 = and i1 %and1, %cmp4 + %and3 = and i1 %and2, %cmp5 + %and4 = and i1 %and3, %cmp6 + %and5 = and i1 %and4, %cmp7 + %and6 = and i1 %and5, %cmp8 + store i1 %and6, i1 addrspace(1)* %out + ret void + +cc: + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_spill_cond_reg_priv(arg_u32 %test_spill_cond_reg_priv)( +; HSAIL: align(4) private_u8 %__privateStack[52]; +; HSAIL: align(4) spill_u8 %__spillStack[40]; + +; HSAIL: lda_private_u32 {{\$s[0-9]+}}, [%__privateStack]; +; HSAIL: st_private_align(4)_u32 123, [%__privateStack][{{\$s[0-9]+}}]; + +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][4]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][8]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][12]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][16]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][20]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][24]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][28]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][32]; +; HSAIL-DAG: st_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][36]; + +; HSAIL: @BB1_1: +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][36]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][20]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][28]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][24]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][32]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][16]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][12]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][8]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][4]; +; HSAIL-DAG: ld_spill_align(4)_u32 {{\$s[0-9]+}}, [%__spillStack][20]; +; HSAIL-DAG: ld_private_align(4)_u32 {{\$s[0-9]+}}, [{{\$s[0-9]+}}+12]; +; HSAIL: st_arg_align(4)_u32 +; HSAIL: ret; +define i32 @test_spill_cond_reg_priv(i1 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index0, i32 %index1) { + %alloca = alloca [13 x i32] + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %c_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 2 + %d_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 3 + %e_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 4 + %f_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 5 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %c = load i32, i32 addrspace(1)* %c_ptr + %d = load i32, i32 addrspace(1)* %d_ptr + %e = load i32, i32 addrspace(1)* %e_ptr + + %cmp0 = icmp eq i32 %a, 0 + %cmp1 = icmp eq i32 %a, %b + %cmp2 = icmp eq i32 %a, %c + %cmp3 = icmp eq i32 %a, %d + %cmp4 = icmp eq i32 %a, %e + %cmp5 = icmp eq i32 %b, %c + %cmp6 = icmp eq i32 %b, %d + %cmp7 = icmp eq i32 %b, %e + %cmp8 = icmp eq i32 %b, 0 + + %gep.0 = getelementptr [13 x i32], [13 x i32]* %alloca, i32 0, i32 %index0 + %gep.1 = getelementptr [13 x i32], [13 x i32]* %alloca, i32 0, i32 %index1 + store i32 123, i32* %gep.0 + br i1 %cmp0, label %bb, label %cc + +bb: + %and0 = and i1 %cmp1, %cmp2 + %and1 = and i1 %and0, %cmp3 + %and2 = and i1 %and1, %cmp4 + %and3 = and i1 %and2, %cmp5 + %and4 = and i1 %and3, %cmp6 + %and5 = and i1 %and4, %cmp7 + %and6 = and i1 %and5, %cmp8 + %gep.2 = getelementptr i32, i32* %gep.1, i32 3 + %load = load i32, i32* %gep.2 + store i1 %and6, i1 addrspace(1)* %out + ret i32 %load + +cc: + ret i32 0 +} Index: test/CodeGen/HSAIL/spill.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/spill.ll @@ -0,0 +1,32 @@ +; RUN: llc -O0 -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_spill_branch_add +; HSAIL: { +; HSAIL: align(4) spill_u8 %__spillStack[12]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL-DAG: ld_global_align(4)_u32 [[A:\$s[0-9]+]], {{\[}}[[IN]]{{\]}}; +; HSAIL-DAG: ld_global_align(4)_u32 [[B:\$s[0-9]+]], {{\[}}[[IN]]+4{{\]}}; +; HSAIL-DAG: st_spill_align(4)_u32 [[A]], [%__spillStack]; +; HSAIL-DAG: st_spill_align(4)_u32 [[B]], [%__spillStack][8]; +; HSAIL-DAG: st_spill_align(4)_u32 [[OUT]], [%__spillStack][4]; +; HSAIL: br @BB0_1; + +; HSAIL: @BB0_1: +; HSAIL-DAG: ld_spill_align(4)_u32 [[A_RELOAD:\$s[0-9]+]], [%__spillStack]; +; HSAIL-DAG: ld_spill_align(4)_u32 [[B_RELOAD:\$s[0-9]+]], [%__spillStack][8]; +; HSAIL-DAG: ld_spill_align(4)_u32 [[OUT_RELOAD:\$s[0-9]+]], [%__spillStack][4]; +; HSAIL-DAG: add_u32 [[RESULT:\$s[0-9]+]], [[A_RELOAD]], [[B_RELOAD]]; +; HSAIL: st_global_align(4)_u32 [[RESULT]], {{\[}}[[OUT_RELOAD]]{{\]}}; +; HSAIL: ret; +define void @test_spill_branch_add(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + br label %bb + +bb: + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/sra.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/sra.ll @@ -0,0 +1,73 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &ashr_v2i32 +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1) * %in + %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr + %result = ashr <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ashr_v4i32 +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in + %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr + %result = ashr <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ashr_i64 +; HSAIL: shr_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 8; +define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) { + %tmp0 = sext i32 %in to i64 + %tmp1 = ashr i64 %tmp0, 8 + store i64 %tmp1, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ashr_i64_2 +; HSAIL: shr_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 + %a = load i64, i64 addrspace(1) * %in + %b = load i64, i64 addrspace(1) * %b_ptr + %result = ashr i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ashr_v2i64 +; HSAIL: shr_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 + %a = load <2 x i64>, <2 x i64> addrspace(1) * %in + %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr + %result = ashr <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &ashr_v4i64 +; HSAIL: shr_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 + %a = load <4 x i64>, <4 x i64> addrspace(1) * %in + %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr + %result = ashr <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/srem.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/srem.ll @@ -0,0 +1,152 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &srem_i32( +; HSAIL: rem_s32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %num = load i32, i32 addrspace(1) * %in + %den = load i32, i32 addrspace(1) * %den_ptr + %result = srem i32 %num, %den + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_i32_4( +; HSAIL-NOT: rem_s32 +; HSAIL: ret; +define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32, i32 addrspace(1) * %in + %result = srem i32 %num, 4 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FIXME: Should use fast expansion +; HSAIL-LABEL: {{^}}prog function &srem_i32_7( +; XHSAIL: mulhi_s32 {{\$s[0-9]+}}, 2454267027 +; XHSAIL: mullo_s32 +; XHSAIL: sub_s32 + +; HSAIL-NOT: rem_s32 +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 18446744071868851347; +; HSAIL-NOT: rem_f32 +; HSAIL: ret; +define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32, i32 addrspace(1) * %in + %result = srem i32 %num, 7 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_v2i32( +; HSAIL: rem_s32 +; HSAIL: rem_s32 +define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %num = load <2 x i32>, <2 x i32> addrspace(1) * %in + %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr + %result = srem <2 x i32> %num, %den + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_v2i32_4( +; HSAIL-NOT: rem_s32 +; HSAIL: ret +define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %num = load <2 x i32>, <2 x i32> addrspace(1) * %in + %result = srem <2 x i32> %num, + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_v4i32( +; HSAIL: rem_s32 +; HSAIL: rem_s32 +; HSAIL: rem_s32 +; HSAIL: rem_s32 +define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %num = load <4 x i32>, <4 x i32> addrspace(1) * %in + %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr + %result = srem <4 x i32> %num, %den + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_v4i32_4( +; HSAIL-NOT: rem_s32 +; HSAIL: ret; +define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %num = load <4 x i32>, <4 x i32> addrspace(1) * %in + %result = srem <4 x i32> %num, + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_i64( +; HSAIL: rem_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 + %num = load i64, i64 addrspace(1) * %in + %den = load i64, i64 addrspace(1) * %den_ptr + %result = srem i64 %num, %den + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_i64_4( +; HSAIL-NOT: srem_s64 +; HSAIL: ret; +define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %num = load i64, i64 addrspace(1) * %in + %result = srem i64 %num, 4 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_v2i64( +; HSAIL: rem_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: rem_s64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 + %num = load <2 x i64>, <2 x i64> addrspace(1) * %in + %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr + %result = srem <2 x i64> %num, %den + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_v2i64_4( +; HSAIL-NOT: rem_s64 +; HSAIL: ret; +define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %num = load <2 x i64>, <2 x i64> addrspace(1) * %in + %result = srem <2 x i64> %num, + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_v4i64( +; HSAIL: rem_s64 +; HSAIL: rem_s64 +; HSAIL: rem_s64 +; HSAIL: rem_s64 +define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 + %num = load <4 x i64>, <4 x i64> addrspace(1) * %in + %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr + %result = srem <4 x i64> %num, %den + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &srem_v4i64_4( +; HSAIL-NOT: rem_s64 +; HSAIL: ret; +define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %num = load <4 x i64>, <4 x i64> addrspace(1) * %in + %result = srem <4 x i64> %num, + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/srl.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/srl.ll @@ -0,0 +1,75 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &lshr_i32 +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = lshr i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &lshr_v2i32 +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1) * %in + %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr + %result = lshr <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &lshr_v4i32 +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in + %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr + %result = lshr <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &lshr_i64 +; HSAIL: shr_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 + %a = load i64, i64 addrspace(1) * %in + %b = load i64, i64 addrspace(1) * %b_ptr + %result = lshr i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &lshr_v2i64 +; HSAIL: shr_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 + %a = load <2 x i64>, <2 x i64> addrspace(1) * %in + %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr + %result = lshr <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &lshr_v4i64 +; HSAIL: shr_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: shr_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 + %a = load <4 x i64>, <4 x i64> addrspace(1) * %in + %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr + %result = lshr <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/stimm.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/stimm.ll @@ -0,0 +1,43 @@ +; XFAIL: * +; Check that immediate stores are possible, fp immediate stores are possible, and vector stores can mix immediates and registers +; RUN: llc < %s -march=hsail | FileCheck %s + +define void @__OpenCL_v4doubleimm_kernel(i32 %global_offset_0, i32 %global_offset_1, i32 %global_offset_2, <4 x double> addrspace(1)* nocapture %out) nounwind { +; CHECK-LABEL: {{^}}prog kernel &__OpenCL_v4doubleimm_kernel( +; CHECK: (0, 4607182418800017408, 4613937818241073152, 4616189618054758400) +entry: + store <4 x double> , <4 x double> addrspace(1)* %out, align 32 + ret void +} + +define void @__OpenCL_v4imm_kernel(i32 %global_offset_0, i32 %global_offset_1, i32 %global_offset_2, <4 x i32> addrspace(1)* nocapture %out) nounwind { +; CHECK-LABEL: {{^}}prog kernel &__OpenCL_v4imm_kernel( +; CHECK: (0, 1, 3, 4) +entry: + store <4 x i32> , <4 x i32> addrspace(1)* %out, align 16 + ret void +} + +define void @__OpenCL_v4mix_kernel(i32 %global_offset_0, i32 %global_offset_1, i32 %global_offset_2, <4 x i32> addrspace(3)* nocapture %out, i32 %x) nounwind { +; CHECK-LABEL: {{^}}prog kernel &__OpenCL_v4mix_kernel( +; CHECK: {{\(0, 1, \$s[0-9]+, 4\)}} +entry: + %tmp4 = insertelement <4 x i32> , i32 %x, i32 2 + store <4 x i32> %tmp4, <4 x i32> addrspace(3)* %out, align 16 + ret void +} + +define void @__OpenCL_trap_kernel(i64 %global_offset_0, i64 %global_offset_1, i64 %global_offset_2, i32 addrspace(1)* nocapture %pVal) nounwind { +; CHECK-LABEL: trap +; CHECK: {{\(42, \$s[0-9]+\)}} +entry: + %arrayidx2 = getelementptr i32, i32 addrspace(1)* %pVal, i64 1 + %tmp5 = load i32, i32 addrspace(1)* %arrayidx2, align 4 + %conv = sext i32 %tmp5 to i64 + %tmp6 = udiv i64 %conv, 13 + %conv7 = trunc i64 %tmp6 to i32 + %0 = insertelement <2 x i32> , i32 %conv7, i32 1 + %arrayidx_v4 = bitcast i32 addrspace(1)* %pVal to <2 x i32> addrspace(1)* + store <2 x i32> %0, <2 x i32> addrspace(1)* %arrayidx_v4, align 4 + ret void +} Index: test/CodeGen/HSAIL/store.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/store.ll @@ -0,0 +1,488 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +;===------------------------------------------------------------------------===; +; Global Address Space +;===------------------------------------------------------------------------===; + +; FUNC-LABEL: {{^}}prog function &store_i1_const +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_u8 255, {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_i1_const(i1 addrspace(1)* %out) { + store i1 true, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_i1_cmp +; HSAIL-DAG: cmp_eq_b1_s32 [[COND:\$c[0-9]+]], {{\$s[0-9]+}}, 1234 +; HSAIL-DAG: cmov_b32 [[REG:\$s[0-9]+]], [[COND]], 4294967295, 0; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_u8 [[REG]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_i1_cmp(i1 addrspace(1)* %out, i32 %a) { + %cmp = icmp eq i32 %a, 1234 + store i1 %cmp, i1 addrspace(1)* %out + ret void +} + +; FIXME: This should be using ld_arg_u8 +; FUNC-LABEL: {{^}}prog function &store_i1_arg +; HSAIL-DAG: ld_arg_u8 [[ARG:\$s[0-9]+]], [%arg]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL-DAG: and_b32 [[REG:\$s[0-9]+]], [[ARG]], 1; +; HSAIL: st_global_u8 [[REG]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_i1_arg(i1 addrspace(1)* %out, i1 %arg) { + store i1 %arg, i1 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_i8 +; HSAIL-DAG: ld_arg_u8 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_u8 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_i8(i8 addrspace(1)* %out, i8 %in) { + store i8 %in, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_i16 +; HSAIL-DAG: ld_arg_align(2)_u16 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_align(2)_u16 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_i16(i16 addrspace(1)* %out, i16 %in) { + store i16 %in, i16 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_i32 +; HSAIL-DAG: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_align(4)_u32 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_i32(i32 addrspace(1)* %out, i32 %in) { + store i32 %in, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_imm_neg1_i32( +; HSAIL: st_global_align(4)_u32 4294967295, {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_imm_neg1_i32(i32 addrspace(1)* %out) { + store i32 -1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_v2i8 +define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) { + %tmp0 = trunc <2 x i32> %in to <2 x i8> + store <2 x i8> %tmp0, <2 x i8> addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}prog function &store_v2i16 +define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) { + %tmp0 = trunc <2 x i32> %in to <2 x i16> + store <2 x i16> %tmp0, <2 x i16> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_v4i8 +define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) { + %tmp0 = trunc <4 x i32> %in to <4 x i8> + store <4 x i8> %tmp0, <4 x i8> addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}prog function &store_v4i16 +define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) { + %tmp0 = trunc <4 x i32> %in to <4 x i16> + store <4 x i16> %tmp0, <4 x i16> addrspace(1)* %out + ret void +} + +; floating-point store + +; FUNC-LABEL: {{^}}prog function &store_f32( +; HSAIL-DAG: ld_arg_align(4)_f32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_align(4)_f32 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_f32(float addrspace(1)* %out, float %in) { + store float %in, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_f32_imm( +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_align(4)_u32 3212836864, {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_f32_imm(float addrspace(1)* %out) { + store float -1.0, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_f64 +; HSAIL-DAG: ld_arg_align(8)_f64 [[IN:\$d[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_global_align(8)_f64 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_f64(double addrspace(1)* %out, double %in) { + store double %in, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_v2f32 +define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) { + %tmp0 = insertelement <2 x float> zeroinitializer, float %a, i32 0 + %tmp1 = insertelement <2 x float> %tmp0, float %b, i32 1 + store <2 x float> %tmp1, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_v4f32 +define void @store_v4f32(<4 x float> addrspace(1)* %out, float %a, float %b, float %c, float %d) { + %tmp0 = insertelement <4 x float> zeroinitializer, float %a, i32 0 + %tmp1 = insertelement <4 x float> %tmp0, float %b, i32 1 + %tmp2 = insertelement <4 x float> %tmp1, float %c, i32 2 + %tmp3 = insertelement <4 x float> %tmp2, float %d, i32 3 + store <4 x float> %tmp3, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_v2f64 +define void @store_v2f64(<2 x double> addrspace(1)* %out, double %a, double %b) { + %tmp0 = insertelement <2 x double> zeroinitializer, double %a, i32 0 + %tmp1 = insertelement <2 x double> %tmp0, double %b, i32 1 + store <2 x double> %tmp1, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_v4f64 +define void @store_v4f64(<4 x double> addrspace(1)* %out, double %a, double %b, double %c, double %d) { + %tmp0 = insertelement <4 x double> zeroinitializer, double %a, i32 0 + %tmp1 = insertelement <4 x double> %tmp0, double %b, i32 1 + %tmp2 = insertelement <4 x double> %tmp1, double %c, i32 2 + %tmp3 = insertelement <4 x double> %tmp2, double %d, i32 3 + store <4 x double> %tmp3, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_v4i32 +define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { + store <4 x i32> %in, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_i64_i8 +define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) { + %tmp0 = trunc i64 %in to i8 + store i8 %tmp0, i8 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_i64_i16 +define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) { + %tmp0 = trunc i64 %in to i16 + store i16 %tmp0, i16 addrspace(1)* %out + ret void +} + +;===------------------------------------------------------------------------===; +; Local Address Space +;===------------------------------------------------------------------------===; + +; FUNC-LABEL: {{^}}prog function &store_local_i1 +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_group_u8 255, {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_local_i1(i1 addrspace(3)* %out) { + store i1 true, i1 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_i8 +; HSAIL-DAG: ld_arg_u8 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_group_u8 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) { + store i8 %in, i8 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_i16 +; HSAIL-DAG: ld_arg_align(2)_u16 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_group_align(2)_u16 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) { + store i16 %in, i16 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_i32 +; HSAIL-DAG: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_group_align(4)_u32 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_local_i32(i32 addrspace(3)* %out, i32 %in) { + store i32 %in, i32 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_v2i16 +define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) { + store <2 x i16> %in, <2 x i16> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_v4i8 +define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) { + store <4 x i8> %in, <4 x i8> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_v2i32 +define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) { + store <2 x i32> %in, <2 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_v4i32 +define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) { + store <4 x i32> %in, <4 x i32> addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_i64_i8 +define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) { + %tmp0 = trunc i64 %in to i8 + store i8 %tmp0, i8 addrspace(3)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_local_i64_i16 +define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) { + %tmp0 = trunc i64 %in to i16 + store i16 %tmp0, i16 addrspace(3)* %out + ret void +} + +;===------------------------------------------------------------------------===; +; Private Address Space +;===------------------------------------------------------------------------===; + +; FUNC-LABEL: {{^}}prog function &store_private_i1 +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_private_u8 255, {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_private_i1(i1* %out) { + store i1 true, i1* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_i8 +; HSAIL-DAG: ld_arg_u8 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_private_u8 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_private_i8(i8* %out, i8 %in) { + store i8 %in, i8* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_i16 +; HSAIL-DAG: ld_arg_align(2)_u16 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_private_align(2)_u16 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_private_i16(i16* %out, i16 %in) { + store i16 %in, i16* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_i32 +; HSAIL-DAG: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_private_align(4)_u32 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_private_i32(i32* %out, i32 %in) { + store i32 %in, i32* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_v2i16 +define void @store_private_v2i16(<2 x i16>* %out, <2 x i16> %in) { + store <2 x i16> %in, <2 x i16>* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_v4i8 +define void @store_private_v4i8(<4 x i8>* %out, <4 x i8> %in) { + store <4 x i8> %in, <4 x i8>* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_v2i32 +define void @store_private_v2i32(<2 x i32>* %out, <2 x i32> %in) { + store <2 x i32> %in, <2 x i32>* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_v4i32 +define void @store_private_v4i32(<4 x i32>* %out, <4 x i32> %in) { + store <4 x i32> %in, <4 x i32>* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_i64_i8 +define void @store_private_i64_i8(i8* %out, i64 %in) { + %tmp0 = trunc i64 %in to i8 + store i8 %tmp0, i8* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_private_i64_i16 +define void @store_private_i64_i16(i16* %out, i64 %in) { + %tmp0 = trunc i64 %in to i16 + store i16 %tmp0, i16* %out + ret void +} + +;===------------------------------------------------------------------------===; +; Flat Address Space +;===------------------------------------------------------------------------===; + +; FUNC-LABEL: {{^}}prog function &store_flat_i1 +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_u8 255, {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_flat_i1(i1 addrspace(4)* %out) { + store i1 true, i1 addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_i8 +; HSAIL-DAG: ld_arg_u8 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_u8 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_flat_i8(i8 addrspace(4)* %out, i8 %in) { + store i8 %in, i8 addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_i16 +; HSAIL-DAG: ld_arg_align(2)_u16 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_align(2)_u16 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_flat_i16(i16 addrspace(4)* %out, i16 %in) { + store i16 %in, i16 addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_i32 +; HSAIL-DAG: ld_arg_align(4)_u32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_align(4)_u32 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_flat_i32(i32 addrspace(4)* %out, i32 %in) { + store i32 %in, i32 addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_v2i16 +define void @store_flat_v2i16(<2 x i16> addrspace(4)* %out, <2 x i16> %in) { + store <2 x i16> %in, <2 x i16> addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_v4i8 +define void @store_flat_v4i8(<4 x i8> addrspace(4)* %out, <4 x i8> %in) { + store <4 x i8> %in, <4 x i8> addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_v2i32 +define void @store_flat_v2i32(<2 x i32> addrspace(4)* %out, <2 x i32> %in) { + store <2 x i32> %in, <2 x i32> addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_v4i32 +define void @store_flat_v4i32(<4 x i32> addrspace(4)* %out, <4 x i32> %in) { + store <4 x i32> %in, <4 x i32> addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_i64_i8 +define void @store_flat_i64_i8(i8 addrspace(4)* %out, i64 %in) { + %tmp0 = trunc i64 %in to i8 + store i8 %tmp0, i8 addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_i64_i16 +define void @store_flat_i64_i16(i16 addrspace(4)* %out, i64 %in) { + %tmp0 = trunc i64 %in to i16 + store i16 %tmp0, i16 addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_f32 +; HSAIL-DAG: ld_arg_align(4)_f32 [[IN:\$s[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_align(4)_f32 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_flat_f32(float addrspace(4)* %out, float %in) { + store float %in, float addrspace(4)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &store_flat_f64 +; HSAIL-DAG: ld_arg_align(8)_f64 [[IN:\$d[0-9]+]], [%in]; +; HSAIL-DAG: ld_arg_align(4)_u32 [[OUT:\$s[0-9]+]], [%out]; +; HSAIL: st_align(8)_f64 [[IN]], {{\[}}[[OUT]]{{\]}} +; HSAIL: ret; +define void @store_flat_f64(double addrspace(4)* %out, double %in) { + store double %in, double addrspace(4)* %out + ret void +} + +; The stores in this function are combined by the optimizer to create a +; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer +; should not try to split the 64-bit store back into 2 32-bit stores. +; +; be two 32-bit stores. + +; FUNC-LABEL: {{^}}prog function &vecload2 +define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { + %tmp0 = load i32, i32 addrspace(2)* %mem, align 4 + %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1 + %tmp1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4 + store i32 %tmp0, i32 addrspace(1)* %out, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 + store i32 %tmp1, i32 addrspace(1)* %arrayidx1, align 4 + ret void +} + +; When i128 was a legal type this program generated cannot select errors: +; FUNC-LABEL: {{^}}prog function &i128_const_store +define void @i128_const_store(i32 addrspace(1)* %out) { + store i32 1, i32 addrspace(1)* %out, align 4 + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 + store i32 1, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 + store i32 2, i32 addrspace(1)* %arrayidx4, align 4 + %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 + store i32 2, i32 addrspace(1)* %arrayidx6, align 4 + ret void +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/CodeGen/HSAIL/sub.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/sub.ll @@ -0,0 +1,90 @@ +;RUN: llc -march=hsail -filetype=asm < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +declare i32 @llvm.HSAIL.get.global.id(i32) readnone + +; FUNC-LABEL: {{^}}prog function &test_sub_i32 +; HSAIL: sub_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = sub i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_sub_v2i32 +; HSAIL: sub_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: sub_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1)* %in + %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr + %result = sub <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_sub_v4i32 +; HSAIL: sub_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: sub_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: sub_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: sub_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1)* %in + %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr + %result = sub <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &s_sub_i64 +; HSAIL: sub_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &v_sub_i64 +; HSAIL: shl_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, 3; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: add_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: sub_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { + %tid = call i32 @llvm.HSAIL.get.global.id(i32 0) readnone + %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid + %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid + %a = load i64, i64 addrspace(1)* %a_ptr + %b = load i64, i64 addrspace(1)* %b_ptr + %result = sub i64 %a, %b + store i64 %result, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_sub_v2i64 +; HSAIL: sub_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: sub_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 + %a = load <2 x i64>, <2 x i64> addrspace(1)* %in + %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr + %result = sub <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_sub_v4i64 +; HSAIL: sub_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: sub_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: sub_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: sub_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 + %a = load <4 x i64>, <4 x i64> addrspace(1)* %in + %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr + %result = sub <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/trunc-store-i1.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/trunc-store-i1.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &global_truncstore_i32_to_i1( +; HSAIL: ld_arg_align(4)_u32 [[VAL:\$s[0-9]+]] +; HSAIL: and_b32 [[AND:\$s[0-9]+]], [[VAL]], 1; +; HSAIL: st_global_u8 [[AND]] +define void @global_truncstore_i32_to_i1(i1 addrspace(1)* %out, i32 %val) nounwind { + %trunc = trunc i32 %val to i1 + store i1 %trunc, i1 addrspace(1)* %out, align 1 + ret void +} + +; SI-LABEL: {{^}}prog function &global_truncstore_i64_to_i1( +; HSAIL: ld_arg_align(8)_u64 [[VAL:\$d[0-9]+]] +; HSAIL: and_b64 [[AND:\$d[0-9]+]], [[VAL]], 1; +; HSAIL: cvt_b1_u64 [[CVT0:\$c[0-9]+]], [[AND]]; +; HSAIL: cvt_s32_b1 [[CVT1:\$s[0-9]+]], [[CVT0]]; +; HSAIL: st_global_u8 [[CVT1]] +define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwind { + %trunc = trunc i64 %val to i1 + store i1 %trunc, i1 addrspace(1)* %out, align 1 + ret void +} + +; SI-LABEL: {{^}}prog function &global_truncstore_i16_to_i1( +; HSAIL: ld_arg_align(2)_u16 [[VAL:\$s[0-9]+]] +; HSAIL: and_b32 [[AND:\$s[0-9]+]], [[VAL]], 1; +; HSAIL: st_global_u8 [[AND]] +define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind { + %trunc = trunc i16 %val to i1 + store i1 %trunc, i1 addrspace(1)* %out, align 1 + ret void +} Index: test/CodeGen/HSAIL/trunc.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/trunc.ll @@ -0,0 +1,66 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL %s + +; HSAIL-LABEL: {{^}}prog function &trunc_i64_to_i32_store( +; HSAIL: ld_arg_align(8)_u64 [[ARG:\$d[0-9]]], [%in]; +; HSAIL: cvt_u32_u64 [[RESULT:\$s[0-9]+]], [[ARG]]; +; HSAIL: st_global_align(4)_u32 [[RESULT]] +define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) { + %result = trunc i64 %in to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &trunc_load_shl_i64( +; HSAIL: ld_arg_align(8)_u64 [[ARG:\$d[0-9]]], [%a]; +; HSAIL: shl_u64 [[SHL:\$d[0-9]+]], [[ARG]], 2; +; HSAIL: cvt_u32_u64 [[RESULT:\$s[0-9]+]], [[SHL]]; +; HSAIL: st_global_align(4)_u32 [[RESULT]] +define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) { + %b = shl i64 %a, 2 + %result = trunc i64 %b to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &trunc_shl_i64( +define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) { + %aa = add i64 %a, 234 ; Prevent shrinking store. + %b = shl i64 %aa, 2 + %result = trunc i64 %b to i32 + store i32 %result, i32 addrspace(1)* %out, align 4 + store i64 %b, i64 addrspace(1)* %out2, align 8 ; Prevent reducing ops to 32-bits + ret void +} + +; HSAIL-LABEL: {{^}}prog function &trunc_i32_to_i1( +define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) { + %a = load i32, i32 addrspace(1)* %ptr, align 4 + %trunc = trunc i32 %a to i1 + %result = select i1 %trunc, i32 1, i32 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &sgpr_trunc_i32_to_i1( +; HSAIL: ld_arg_align(4)_u32 [[ARG:\$s[0-9]]], [%a]; +; HSAIL: and_b32 [[AND:\$s[0-9]+]], [[ARG]], 1; +; HSAIL: cvt_b1_u32 [[CVT:\$c[0-9]+]], [[AND]]; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, [[CVT]] +define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) { + %trunc = trunc i32 %a to i1 + %result = select i1 %trunc, i32 1, i32 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} + +; HSAIL-LABEL: {{^}}prog function &sgpr_trunc_i64_to_i1( +; HSAIL: ld_arg_align(8)_u64 [[ARG:\$d[0-9]]], [%a]; +; HSAIL: and_b64 [[AND:\$d[0-9]+]], [[ARG]], 1; +; HSAIL: cvt_b1_u64 [[CVT:\$c[0-9]+]], [[AND]]; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, [[CVT]] +define void @sgpr_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %a) { + %trunc = trunc i64 %a to i1 + %result = select i1 %trunc, i32 1, i32 0 + store i32 %result, i32 addrspace(1)* %out, align 4 + ret void +} Index: test/CodeGen/HSAIL/uint_to_fp.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/uint_to_fp.ll @@ -0,0 +1,97 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_i32_to_f32 +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) { + %result = uitofp i32 %in to float + store float %result, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_v2i32_to_v2f32 +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) { + %result = uitofp <2 x i32> %in to <2 x float> + store <2 x float> %result, <2 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_v4i32_to_v4f32 +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f32_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32>, <4 x i32> addrspace(1) * %in + %result = uitofp <4 x i32> %value to <4 x float> + store <4 x float> %result, <4 x float> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_i64_to_f32 +; HSAIL: cvt_f32_u64 {{\$s[0-9]+}}, {{\$d[0-9]+}}; +define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) { + %tmp0 = uitofp i64 %in to float + store float %tmp0, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_i1_to_f32 +; HSAIL: cmp_eq_b1_s32 [[CMP:\$c[0-9]+]], {{\$s[0-9]+}}, 0; +; HSAIL: cmov_b32 [[RESULT:\$s[0-9]+]], [[CMP]], 1065353216, 0; +; HSAIL: st_global_align(4)_u32 [[RESULT]] +define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to float + store float %fp, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_i32_to_f64 +; HSAIL: cvt_f64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @uint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) { + %result = uitofp i32 %in to double + store double %result, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_v2i32_to_v2f64 +; HSAIL: cvt_f64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i32> %in) { + %result = uitofp <2 x i32> %in to <2 x double> + store <2 x double> %result, <2 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_v4i32_to_v4f64 +; HSAIL: cvt_f64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_f64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %value = load <4 x i32>, <4 x i32> addrspace(1) * %in + %result = uitofp <4 x i32> %value to <4 x double> + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_i64_to_f64 +; HSAIL: cvt_f64_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) { + %tmp0 = uitofp i64 %in to double + store double %tmp0, double addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &uint_to_fp_i1_to_f64 +; HSAIL: cmp_eq_b1_s32 [[CMP:\$c[0-9]+]], {{\$s[0-9]+}}, 0; +; HSAIL: cmov_b64 [[RESULT:\$d[0-9]+]], [[CMP]], 4607182418800017408, 0; +; HSAIL: st_global_align(8)_u64 [[RESULT]] +define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) { + %cmp = icmp eq i32 %in, 0 + %fp = uitofp i1 %cmp to double + store double %fp, double addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/unbound-label-exception.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/unbound-label-exception.ll @@ -0,0 +1,22 @@ +; RUN: llc -march=hsail64 -disable-branch-analysis < %s +; RUN: llc -O0 -march=hsail64 -disable-branch-analysis < %s + +@__xxxxyyzzzzqqq = addrspace(2) constant [128 x <2 x float>] [<2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> , <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer, <2 x float> zeroinitializer], align 8 + + +define float @foo(float %x) #0 { +bb.a: + br i1 undef, label %bb.c, label %bb.b + +bb.b: + br i1 undef, label %bb.c, label %bb.c + +bb.c: + ret float undef +} + +define float @Z(float %arg) #0 { + ret float undef +} + +attributes #0 = { nounwind } Index: test/CodeGen/HSAIL/urem.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/urem.ll @@ -0,0 +1,87 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_urem_i32 +; HSAIL: rem_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 + %a = load i32, i32 addrspace(1)* %in + %b = load i32, i32 addrspace(1)* %b_ptr + %result = urem i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; HSAIL-LABEL: {{^}}prog function &test_urem_i32_7( +; HSAIL-NOT: rem_u32 +; HSAIL: mul_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, 613566757; +; HSAIL: ret; +define void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32, i32 addrspace(1) * %in + %result = urem i32 %num, 7 + store i32 %result, i32 addrspace(1)* %out + ret void +} + + +; FUNC-LABEL: {{^}}prog function &test_urem_v2i32 +; HSAIL: rem_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: rem_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 + %a = load <2 x i32>, <2 x i32> addrspace(1)* %in + %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr + %result = urem <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_urem_v4i32 +; HSAIL: rem_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: rem_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: rem_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: rem_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 + %a = load <4 x i32>, <4 x i32> addrspace(1)* %in + %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr + %result = urem <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_urem_i64 +; HSAIL: rem_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 + %a = load i64, i64 addrspace(1)* %in + %b = load i64, i64 addrspace(1)* %b_ptr + %result = urem i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_urem_v2i64 +; HSAIL: rem_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: rem_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 + %a = load <2 x i64>, <2 x i64> addrspace(1)* %in + %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr + %result = urem <2 x i64> %a, %b + store <2 x i64> %result, <2 x i64> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &test_urem_v4i64 +; HSAIL: rem_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: rem_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: rem_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: rem_u64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 + %a = load <4 x i64>, <4 x i64> addrspace(1)* %in + %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr + %result = urem <4 x i64> %a, %b + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/v1i64-kernel-arg.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/v1i64-kernel-arg.ll @@ -0,0 +1,19 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &kernel_arg_i64 +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%a]; +; HSAIL-NEXT: st_global_align(8)_u64 +; HSAIL-NEXT: ret; +define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { + store i64 %a, i64 addrspace(1)* %out, align 8 + ret void +} + +; FUNC-LABEL: {{^}}prog function &kernel_arg_v1i64 +; HSAIL: ld_arg_align(8)_u64 {{\$d[0-9]+}}, [%a]; +; HSAIL-NEXT: st_global_align(8)_u64 +; HSAIL-NEXT: ret; +define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { + store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 + ret void +} Index: test/CodeGen/HSAIL/v_cndmask.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/v_cndmask.ll @@ -0,0 +1,11 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &v_cnd_nan +; HSAIL: cmp_ne_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, 4294967295, {{\$s[0-9]+}}; +define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) { + %tmp0 = icmp ne i32 %c, 0 + %tmp1 = select i1 %tmp0, float 0xFFFFFFFFE0000000, float %f + store float %tmp1, float addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/vselect64.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/vselect64.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test_select_v4i64 +; HSAIL: cmp_ne_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: cmov_b64 {{\$d[0-9]+}}, {{\$c[0-9]+}}, 0, 4; +; HSAIL: cmp_ne_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: cmov_b64 {{\$d[0-9]+}}, {{\$c[0-9]+}}, 1, 5; +; HSAIL: cmp_ne_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: cmov_b64 {{\$d[0-9]+}}, {{\$c[0-9]+}}, 2, 6; +; HSAIL: cmp_ne_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0; +; HSAIL: cmov_b64 {{\$d[0-9]+}}, {{\$c[0-9]+}}, 3, 7; +define void @test_select_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> %c) { + %cmp = icmp ne <4 x i32> %c, + %result = select <4 x i1> %cmp, <4 x i64> , <4 x i64> + store <4 x i64> %result, <4 x i64> addrspace(1)* %out + ret void +} + Index: test/CodeGen/HSAIL/work-item-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/work-item-intrinsics.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=hsail -verify-machineinstrs < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; + +; FUNC-LABEL: {{^}}prog function &tidig_out_of_range +; HSAIL-NOT: workitemabsid_u32 +; HSAIL: ret; +define void @tidig_out_of_range(i32 addrspace(1)* %out) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.global.id(i32 3) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; Undefined +; FUNC-LABEL: {{^}}prog function &tidig_reg +; HSAIL-NOT: workitemabsid +; HSAIL: ret; +define void @tidig_reg(i32 addrspace(1)* %out, i32 %dim) #1 { + %tmp0 = call i32 @llvm.HSAIL.get.global.id(i32 %dim) #0 + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +declare i32 @llvm.HSAIL.get.global.id(i32) #0 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } Index: test/CodeGen/HSAIL/xor.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/xor.ll @@ -0,0 +1,142 @@ +; XFAIL: * +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &xor_v2i32 +; HSAIL: xor_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: xor_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) { + %a = load <2 x i32>, <2 x i32> addrspace(1) * %in0 + %b = load <2 x i32>, <2 x i32> addrspace(1) * %in1 + %result = xor <2 x i32> %a, %b + store <2 x i32> %result, <2 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &xor_v4i32 +; HSAIL: xor_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: xor_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: xor_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: xor_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) { + %a = load <4 x i32>, <4 x i32> addrspace(1) * %in0 + %b = load <4 x i32>, <4 x i32> addrspace(1) * %in1 + %result = xor <4 x i32> %a, %b + store <4 x i32> %result, <4 x i32> addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &xor_i1 +; HSAIL: cmp_ge_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL: cmp_ge_ftz_b1_f32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, 0F00000000; +; HSAIL: xor_b1 {{\$c[0-9]+}}, {{\$c[0-9]+}}, {{\$c[0-9]+}}; +; HSAIL: cmov_b32 {{\$s[0-9]+}}, {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { + %a = load float, float addrspace(1) * %in0 + %b = load float, float addrspace(1) * %in1 + %acmp = fcmp oge float %a, 0.000000e+00 + %bcmp = fcmp oge float %b, 0.000000e+00 + %xor = xor i1 %acmp, %bcmp + %result = select i1 %xor, float %a, float %b + store float %result, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_xor_i32 +; HSAIL: xor_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) { + %a = load i32, i32 addrspace(1)* %in0 + %b = load i32, i32 addrspace(1)* %in1 + %result = xor i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &scalar_xor_i32 +; HSAIL: xor_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %result = xor i32 %a, %b + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &scalar_not_i32 +; HSAIL: not_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) { + %result = xor i32 %a, -1 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_not_i32 +; HSAIL: not_b32 {{\$s[0-9]+}}, {{\$s[0-9]+}}; +define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) { + %a = load i32, i32 addrspace(1)* %in0 + %b = load i32, i32 addrspace(1)* %in1 + %result = xor i32 %a, -1 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_xor_i64 +; HSAIL: xor_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) { + %a = load i64, i64 addrspace(1)* %in0 + %b = load i64, i64 addrspace(1)* %in1 + %result = xor i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &scalar_xor_i64 +; HSAIL: xor_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { + %result = xor i64 %a, %b + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &scalar_not_i64 +; HSAIL: not_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) { + %result = xor i64 %a, -1 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &vector_not_i64 +; HSAIL: not_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}; +define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) { + %a = load i64, i64 addrspace(1)* %in0 + %b = load i64, i64 addrspace(1)* %in1 + %result = xor i64 %a, -1 + store i64 %result, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &xor_cf +; HSAIL: cmp_eq_b1_s64 {{\$c[0-9]+}}, {{\$d[0-9]+}}, 0; +; HSAIL-NEXT: cbr_b1 {{\$c[0-9]+}}, @BB11_1; +; HSAIL: br @BB11_3 +; HSAIL: {{^@BB11_1:}} +; HSAIL: xor_b64 {{\$d[0-9]+}}, {{\$d[0-9]+}}, {{\$d[0-9]+}}; +; HSAIL: {{^@BB11_3:}} +; HSAIL:st_global_u64 {{\$d[0-9]+}}, [{{\$s[0-9]+}}]; +; HSAIL: ret; +define void @xor_cf(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b) { +entry: + %0 = icmp eq i64 %a, 0 + br i1 %0, label %if, label %else + +if: + %1 = xor i64 %a, %b + br label %endif + +else: + %2 = load i64, i64 addrspace(1)* %in + br label %endif + +endif: + %3 = phi i64 [%1, %if], [%2, %else] + store i64 %3, i64 addrspace(1)* %out + ret void +} Index: test/CodeGen/HSAIL/zero_extend.ll =================================================================== --- /dev/null +++ test/CodeGen/HSAIL/zero_extend.ll @@ -0,0 +1,35 @@ +; XFAIL: * +; RUN: llc -march=hsail < %s | FileCheck -check-prefix=HSAIL -check-prefix=FUNC %s + +; FUNC-LABEL: {{^}}prog function &test +; HSAIL: mad_u32 {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_u64_u32 {{\$d[0-9]+}}, {{\$s[0-9]+}}; +define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) { +entry: + %tmp0 = mul i32 %a, %b + %tmp1 = add i32 %tmp0, %c + %tmp2 = zext i32 %tmp1 to i64 + store i64 %tmp2, i64 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &testi1toi32 +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_u32_b1 {{\$s[0-9]+}}, {{\$c[0-9]+}}; +define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) { +entry: + %tmp0 = icmp eq i32 %a, %b + %tmp1 = zext i1 %tmp0 to i32 + store i32 %tmp1, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}prog function &zext_i1_to_i64 +; HSAIL: cmp_eq_b1_s32 {{\$c[0-9]+}}, {{\$s[0-9]+}}, {{\$s[0-9]+}}; +; HSAIL: cvt_u64_b1 {{\$d[0-9]+}}, {{\$c[0-9]+}}; +define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp eq i32 %a, %b + %ext = zext i1 %cmp to i64 + store i64 %ext, i64 addrspace(1)* %out, align 8 + ret void +}