Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F15139727
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 MB
Subscribers
None
View Options
This file is larger than 256 KB, so syntax highlighting was skipped.
diff git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 3293c29a05fb..1d80e25c35a8 100644
 a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ 1,199 +1,199 @@
//=== ValueTypes.td  ValueType definitions * tablegen *===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDXLicenseIdentifier: Apache2.0 WITH LLVMexception
//
//======//
//
// Value types  These values correspond to the register types defined in the
// MachineValueTypes.h file. If you update anything here, you must update it
// there as well!
//
//======//
class ValueType<int size, int value> {
string Namespace = "MVT";
int Size = size;
int Value = value;
}
def OtherVT: ValueType<0 , 1>; // "Other" value
def i1 : ValueType<1 , 2>; // One bit boolean value
def i8 : ValueType<8 , 3>; // 8bit integer value
def i16 : ValueType<16 , 4>; // 16bit integer value
def i32 : ValueType<32 , 5>; // 32bit integer value
def i64 : ValueType<64 , 6>; // 64bit integer value
def i128 : ValueType<128, 7>; // 128bit integer value
def f16 : ValueType<16 , 8>; // 16bit floating point value
def f32 : ValueType<32 , 9>; // 32bit floating point value
def f64 : ValueType<64 , 10>; // 64bit floating point value
def f80 : ValueType<80 , 11>; // 80bit floating point value
def f128 : ValueType<128, 12>; // 128bit floating point value
def ppcf128: ValueType<128, 13>; // PPC 128bit floating point value
def v1i1 : ValueType<1 , 14>; // 1 x i1 vector value
def v2i1 : ValueType<2 , 15>; // 2 x i1 vector value
def v4i1 : ValueType<4 , 16>; // 4 x i1 vector value
def v8i1 : ValueType<8 , 17>; // 8 x i1 vector value
def v16i1 : ValueType<16, 18>; // 16 x i1 vector value
def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value
def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value
def v128i1 : ValueType<128, 21>; // 128 x i1 vector value
def v512i1 : ValueType<512, 22>; // 512 x i1 vector value
def v1024i1: ValueType<1024,23>; //1024 x i1 vector value
def v1i8 : ValueType<8, 24>; // 1 x i8 vector value
def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value
def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value
def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value
def v16i8 : ValueType<128, 28>; // 16 x i8 vector value
def v32i8 : ValueType<256, 29>; // 32 x i8 vector value
def v64i8 : ValueType<512, 30>; // 64 x i8 vector value
def v128i8 : ValueType<1024,31>; //128 x i8 vector value
def v256i8 : ValueType<2048,32>; //256 x i8 vector value
def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value
def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value
def v3i16 : ValueType<48 , 35>; // 3 x i16 vector value
def v4i16 : ValueType<64 , 36>; // 4 x i16 vector value
def v8i16 : ValueType<128, 37>; // 8 x i16 vector value
def v16i16 : ValueType<256, 38>; // 16 x i16 vector value
def v32i16 : ValueType<512, 39>; // 32 x i16 vector value
def v64i16 : ValueType<1024,40>; // 64 x i16 vector value
def v128i16: ValueType<2048,41>; //128 x i16 vector value
def v1i32 : ValueType<32 , 42>; // 1 x i32 vector value
def v2i32 : ValueType<64 , 43>; // 2 x i32 vector value
def v3i32 : ValueType<96 , 44>; // 3 x i32 vector value
def v4i32 : ValueType<128, 45>; // 4 x i32 vector value
def v5i32 : ValueType<160, 46>; // 5 x i32 vector value
def v8i32 : ValueType<256, 47>; // 8 x i32 vector value
def v16i32 : ValueType<512, 48>; // 16 x i32 vector value
def v32i32 : ValueType<1024,49>; // 32 x i32 vector value
def v64i32 : ValueType<2048,50>; // 64 x i32 vector value
def v128i32 : ValueType<4096,51>; // 128 x i32 vector value
def v256i32 : ValueType<8182,52>; // 256 x i32 vector value
def v512i32 : ValueType<16384,53>; // 512 x i32 vector value
def v1024i32 : ValueType<32768,54>; // 1024 x i32 vector value
def v2048i32 : ValueType<65536,55>; // 2048 x i32 vector value
def v1i64 : ValueType<64 , 56>; // 1 x i64 vector value
def v2i64 : ValueType<128, 57>; // 2 x i64 vector value
def v4i64 : ValueType<256, 58>; // 4 x i64 vector value
def v8i64 : ValueType<512, 59>; // 8 x i64 vector value
def v16i64 : ValueType<1024,60>; // 16 x i64 vector value
def v32i64 : ValueType<2048,61>; // 32 x i64 vector value
def v1i128 : ValueType<128, 62>; // 1 x i128 vector value
def nxv1i1 : ValueType<1, 63>; // n x 1 x i1 vector value
def nxv2i1 : ValueType<2, 64>; // n x 2 x i1 vector value
def nxv4i1 : ValueType<4, 65>; // n x 4 x i1 vector value
def nxv8i1 : ValueType<8, 66>; // n x 8 x i1 vector value
def nxv16i1 : ValueType<16, 67>; // n x 16 x i1 vector value
def nxv32i1 : ValueType<32, 68>; // n x 32 x i1 vector value

def nxv1i8 : ValueType<8, 69>; // n x 1 x i8 vector value
def nxv2i8 : ValueType<16, 70>; // n x 2 x i8 vector value
def nxv4i8 : ValueType<32, 71>; // n x 4 x i8 vector value
def nxv8i8 : ValueType<64, 72>; // n x 8 x i8 vector value
def nxv16i8 : ValueType<128, 73>; // n x 16 x i8 vector value
def nxv32i8 : ValueType<256, 74>; // n x 32 x i8 vector value

def nxv1i16 : ValueType<16, 75>; // n x 1 x i16 vector value
def nxv2i16 : ValueType<32, 76>; // n x 2 x i16 vector value
def nxv4i16 : ValueType<64, 77>; // n x 4 x i16 vector value
def nxv8i16 : ValueType<128, 78>; // n x 8 x i16 vector value
def nxv16i16: ValueType<256, 79>; // n x 16 x i16 vector value
def nxv32i16: ValueType<512, 80>; // n x 32 x i16 vector value

def nxv1i32 : ValueType<32, 81>; // n x 1 x i32 vector value
def nxv2i32 : ValueType<64, 82>; // n x 2 x i32 vector value
def nxv4i32 : ValueType<128, 83>; // n x 4 x i32 vector value
def nxv8i32 : ValueType<256, 84>; // n x 8 x i32 vector value
def nxv16i32: ValueType<512, 85>; // n x 16 x i32 vector value
def nxv32i32: ValueType<1024,86>; // n x 32 x i32 vector value

def nxv1i64 : ValueType<64, 87>; // n x 1 x i64 vector value
def nxv2i64 : ValueType<128, 88>; // n x 2 x i64 vector value
def nxv4i64 : ValueType<256, 89>; // n x 4 x i64 vector value
def nxv8i64 : ValueType<512, 90>; // n x 8 x i64 vector value
def nxv16i64: ValueType<1024,91>; // n x 16 x i64 vector value
def nxv32i64: ValueType<2048,92>; // n x 32 x i64 vector value

def v2f16 : ValueType<32 , 93>; // 2 x f16 vector value
def v3f16 : ValueType<48 , 94>; // 3 x f16 vector value
def v4f16 : ValueType<64 , 95>; // 4 x f16 vector value
def v8f16 : ValueType<128, 96>; // 8 x f16 vector value
def v16f16 : ValueType<256, 97>; // 8 x f16 vector value
def v32f16 : ValueType<512, 98>; // 8 x f16 vector value
def v1f32 : ValueType<32 , 99>; // 1 x f32 vector value
def v2f32 : ValueType<64 , 100>; // 2 x f32 vector value
def v3f32 : ValueType<96 , 101>; // 3 x f32 vector value
def v4f32 : ValueType<128, 102>; // 4 x f32 vector value
def v5f32 : ValueType<160, 103>; // 5 x f32 vector value
def v8f32 : ValueType<256, 104>; // 8 x f32 vector value
def v16f32 : ValueType<512, 105>; // 16 x f32 vector value
def v32f32 : ValueType<1024, 106>; // 32 x f32 vector value
def v64f32 : ValueType<2048, 107>; // 64 x f32 vector value
def v128f32 : ValueType<4096, 108>; // 128 x f32 vector value
def v256f32 : ValueType<8182, 109>; // 256 x f32 vector value
def v512f32 : ValueType<16384, 110>; // 512 x f32 vector value
def v1024f32 : ValueType<32768, 111>; // 1024 x f32 vector value
def v2048f32 : ValueType<65536, 112>; // 2048 x f32 vector value
def v1f64 : ValueType<64, 113>; // 1 x f64 vector value
def v2f64 : ValueType<128, 114>; // 2 x f64 vector value
def v4f64 : ValueType<256, 115>; // 4 x f64 vector value
def v8f64 : ValueType<512, 116>; // 8 x f64 vector value
+def v2f16 : ValueType<32 , 63>; // 2 x f16 vector value
+def v3f16 : ValueType<48 , 64>; // 3 x f16 vector value
+def v4f16 : ValueType<64 , 65>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 66>; // 8 x f16 vector value
+def v16f16 : ValueType<256, 67>; // 8 x f16 vector value
+def v32f16 : ValueType<512, 68>; // 8 x f16 vector value
+def v1f32 : ValueType<32 , 69>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 70>; // 2 x f32 vector value
+def v3f32 : ValueType<96 , 71>; // 3 x f32 vector value
+def v4f32 : ValueType<128, 72>; // 4 x f32 vector value
+def v5f32 : ValueType<160, 73>; // 5 x f32 vector value
+def v8f32 : ValueType<256, 74>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 75>; // 16 x f32 vector value
+def v32f32 : ValueType<1024, 76>; // 32 x f32 vector value
+def v64f32 : ValueType<2048, 77>; // 64 x f32 vector value
+def v128f32 : ValueType<4096, 78>; // 128 x f32 vector value
+def v256f32 : ValueType<8182, 79>; // 256 x f32 vector value
+def v512f32 : ValueType<16384, 80>; // 512 x f32 vector value
+def v1024f32 : ValueType<32768, 81>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 82>; // 2048 x f32 vector value
+def v1f64 : ValueType<64, 83>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 84>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 85>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 86>; // 8 x f64 vector value
+
+def nxv1i1 : ValueType<1, 87>; // n x 1 x i1 vector value
+def nxv2i1 : ValueType<2, 88>; // n x 2 x i1 vector value
+def nxv4i1 : ValueType<4, 89>; // n x 4 x i1 vector value
+def nxv8i1 : ValueType<8, 90>; // n x 8 x i1 vector value
+def nxv16i1 : ValueType<16, 91>; // n x 16 x i1 vector value
+def nxv32i1 : ValueType<32, 92>; // n x 32 x i1 vector value
+
+def nxv1i8 : ValueType<8, 93>; // n x 1 x i8 vector value
+def nxv2i8 : ValueType<16, 94>; // n x 2 x i8 vector value
+def nxv4i8 : ValueType<32, 95>; // n x 4 x i8 vector value
+def nxv8i8 : ValueType<64, 96>; // n x 8 x i8 vector value
+def nxv16i8 : ValueType<128, 97>; // n x 16 x i8 vector value
+def nxv32i8 : ValueType<256, 98>; // n x 32 x i8 vector value
+
+def nxv1i16 : ValueType<16, 99>; // n x 1 x i16 vector value
+def nxv2i16 : ValueType<32, 100>; // n x 2 x i16 vector value
+def nxv4i16 : ValueType<64, 101>; // n x 4 x i16 vector value
+def nxv8i16 : ValueType<128, 102>; // n x 8 x i16 vector value
+def nxv16i16: ValueType<256, 103>; // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 104>; // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32, 105>; // n x 1 x i32 vector value
+def nxv2i32 : ValueType<64, 106>; // n x 2 x i32 vector value
+def nxv4i32 : ValueType<128, 107>; // n x 4 x i32 vector value
+def nxv8i32 : ValueType<256, 108>; // n x 8 x i32 vector value
+def nxv16i32: ValueType<512, 109>; // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,110>; // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64, 111>; // n x 1 x i64 vector value
+def nxv2i64 : ValueType<128, 112>; // n x 2 x i64 vector value
+def nxv4i64 : ValueType<256, 113>; // n x 4 x i64 vector value
+def nxv8i64 : ValueType<512, 114>; // n x 8 x i64 vector value
+def nxv16i64: ValueType<1024,115>; // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,116>; // n x 32 x i64 vector value
def nxv2f16 : ValueType<32 , 117>; // n x 2 x f16 vector value
def nxv4f16 : ValueType<64 , 118>; // n x 4 x f16 vector value
def nxv8f16 : ValueType<128, 119>; // n x 8 x f16 vector value
def nxv1f32 : ValueType<32 , 120>; // n x 1 x f32 vector value
def nxv2f32 : ValueType<64 , 121>; // n x 2 x f32 vector value
def nxv4f32 : ValueType<128, 122>; // n x 4 x f32 vector value
def nxv8f32 : ValueType<256, 123>; // n x 8 x f32 vector value
def nxv16f32 : ValueType<512, 124>; // n x 16 x f32 vector value
def nxv1f64 : ValueType<64, 125>; // n x 1 x f64 vector value
def nxv2f64 : ValueType<128, 126>; // n x 2 x f64 vector value
def nxv4f64 : ValueType<256, 127>; // n x 4 x f64 vector value
def nxv8f64 : ValueType<512, 128>; // n x 8 x f64 vector value
def x86mmx : ValueType<64 , 129>; // X86 MMX value
def FlagVT : ValueType<0 , 130>; // PreRA sched glue
def isVoid : ValueType<0 , 131>; // Produces no value
def untyped: ValueType<8 , 132>; // Produces an untyped value
def exnref: ValueType<0, 133>; // WebAssembly's exnref type
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata
// Pseudo valuetype mapped to the current pointer size to any address space.
// Should only be used in TableGen.
def iPTRAny : ValueType<0, 250>;
// Pseudo valuetype to represent "vector of any size"
def vAny : ValueType<0 , 251>;
// Pseudo valuetype to represent "float of any format"
def fAny : ValueType<0 , 252>;
// Pseudo valuetype to represent "integer of any bit width"
def iAny : ValueType<0 , 253>;
// Pseudo valuetype mapped to the current pointer size.
def iPTR : ValueType<0 , 254>;
// Pseudo valuetype to represent "any type of any size".
def Any : ValueType<0 , 255>;
/// This class is for targets that want to use pointer types in patterns
/// with the GlobalISelEmitter. Targets must define their own pointer
/// derived from this class. The scalar argument should be an
/// integer type with the same bit size as the ponter.
/// e.g. def p0 : PtrValueType <i64, 0>;
class PtrValueType <ValueType scalar, int addrspace> :
ValueType<scalar.Size, scalar.Value> {
int AddrSpace = addrspace;
}
diff git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index f15475b118bd..ae3079ceb84f 100644
 a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ 1,1133 +1,1159 @@
//=== Support/MachineValueType.h  MachineLevel types * C++ *===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDXLicenseIdentifier: Apache2.0 WITH LLVMexception
//
//======//
//
// This file defines the set of machinelevel target independent types which
// legal values in the code generator use.
//
//======//
#ifndef LLVM_SUPPORT_MACHINEVALUETYPE_H
#define LLVM_SUPPORT_MACHINEVALUETYPE_H
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScalableSize.h"
#include <cassert>
namespace llvm {
class Type;
/// Machine Value Type. Every type that is supported natively by some
/// processor targeted by LLVM occurs here. This means that any legal value
/// type can be represented by an MVT.
class MVT {
public:
enum SimpleValueType : uint8_t {
// Simple value types that aren't explicitly part of this enumeration
// are considered extended value types.
INVALID_SIMPLE_VALUE_TYPE = 0,
// If you change this numbering, you must change the values in
// ValueTypes.td as well!
Other = 1, // This is a nonstandard value
i1 = 2, // This is a 1 bit integer value
i8 = 3, // This is an 8 bit integer value
i16 = 4, // This is a 16 bit integer value
i32 = 5, // This is a 32 bit integer value
i64 = 6, // This is a 64 bit integer value
i128 = 7, // This is a 128 bit integer value
FIRST_INTEGER_VALUETYPE = i1,
LAST_INTEGER_VALUETYPE = i128,
f16 = 8, // This is a 16 bit floating point value
f32 = 9, // This is a 32 bit floating point value
f64 = 10, // This is a 64 bit floating point value
f80 = 11, // This is a 80 bit floating point value
f128 = 12, // This is a 128 bit floating point value
ppcf128 = 13, // This is a PPC 128bit floating point value
FIRST_FP_VALUETYPE = f16,
LAST_FP_VALUETYPE = ppcf128,
v1i1 = 14, // 1 x i1
v2i1 = 15, // 2 x i1
v4i1 = 16, // 4 x i1
v8i1 = 17, // 8 x i1
v16i1 = 18, // 16 x i1
v32i1 = 19, // 32 x i1
v64i1 = 20, // 64 x i1
v128i1 = 21, // 128 x i1
v512i1 = 22, // 512 x i1
v1024i1 = 23, // 1024 x i1
v1i8 = 24, // 1 x i8
v2i8 = 25, // 2 x i8
v4i8 = 26, // 4 x i8
v8i8 = 27, // 8 x i8
v16i8 = 28, // 16 x i8
v32i8 = 29, // 32 x i8
v64i8 = 30, // 64 x i8
v128i8 = 31, //128 x i8
v256i8 = 32, //256 x i8
v1i16 = 33, // 1 x i16
v2i16 = 34, // 2 x i16
v3i16 = 35, // 3 x i16
v4i16 = 36, // 4 x i16
v8i16 = 37, // 8 x i16
v16i16 = 38, // 16 x i16
v32i16 = 39, // 32 x i16
v64i16 = 40, // 64 x i16
v128i16 = 41, //128 x i16
v1i32 = 42, // 1 x i32
v2i32 = 43, // 2 x i32
v3i32 = 44, // 3 x i32
v4i32 = 45, // 4 x i32
v5i32 = 46, // 5 x i32
v8i32 = 47, // 8 x i32
v16i32 = 48, // 16 x i32
v32i32 = 49, // 32 x i32
v64i32 = 50, // 64 x i32
v128i32 = 51, // 128 x i32
v256i32 = 52, // 256 x i32
v512i32 = 53, // 512 x i32
v1024i32 = 54, // 1024 x i32
v2048i32 = 55, // 2048 x i32
v1i64 = 56, // 1 x i64
v2i64 = 57, // 2 x i64
v4i64 = 58, // 4 x i64
v8i64 = 59, // 8 x i64
v16i64 = 60, // 16 x i64
v32i64 = 61, // 32 x i64
v1i128 = 62, // 1 x i128
 // Scalable integer types
 nxv1i1 = 63, // n x 1 x i1
 nxv2i1 = 64, // n x 2 x i1
 nxv4i1 = 65, // n x 4 x i1
 nxv8i1 = 66, // n x 8 x i1
 nxv16i1 = 67, // n x 16 x i1
 nxv32i1 = 68, // n x 32 x i1

 nxv1i8 = 69, // n x 1 x i8
 nxv2i8 = 70, // n x 2 x i8
 nxv4i8 = 71, // n x 4 x i8
 nxv8i8 = 72, // n x 8 x i8
 nxv16i8 = 73, // n x 16 x i8
 nxv32i8 = 74, // n x 32 x i8

 nxv1i16 = 75, // n x 1 x i16
 nxv2i16 = 76, // n x 2 x i16
 nxv4i16 = 77, // n x 4 x i16
 nxv8i16 = 78, // n x 8 x i16
 nxv16i16 = 79, // n x 16 x i16
 nxv32i16 = 80, // n x 32 x i16

 nxv1i32 = 81, // n x 1 x i32
 nxv2i32 = 82, // n x 2 x i32
 nxv4i32 = 83, // n x 4 x i32
 nxv8i32 = 84, // n x 8 x i32
 nxv16i32 = 85, // n x 16 x i32
 nxv32i32 = 86, // n x 32 x i32

 nxv1i64 = 87, // n x 1 x i64
 nxv2i64 = 88, // n x 2 x i64
 nxv4i64 = 89, // n x 4 x i64
 nxv8i64 = 90, // n x 8 x i64
 nxv16i64 = 91, // n x 16 x i64
 nxv32i64 = 92, // n x 32 x i64

 FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
 LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,

 FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1,
 LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,

 v2f16 = 93, // 2 x f16
 v3f16 = 94, // 3 x f16
 v4f16 = 95, // 4 x f16
 v8f16 = 96, // 8 x f16
 v16f16 = 97, // 16 x f16
 v32f16 = 98, // 32 x f16
 v1f32 = 99, // 1 x f32
 v2f32 = 100, // 2 x f32
 v3f32 = 101, // 3 x f32
 v4f32 = 102, // 4 x f32
 v5f32 = 103, // 5 x f32
 v8f32 = 104, // 8 x f32
 v16f32 = 105, // 16 x f32
 v32f32 = 106, // 32 x f32
 v64f32 = 107, // 64 x f32
 v128f32 = 108, // 128 x f32
 v256f32 = 109, // 256 x f32
 v512f32 = 110, // 512 x f32
 v1024f32 = 111, // 1024 x f32
 v2048f32 = 112, // 2048 x f32
 v1f64 = 113, // 1 x f64
 v2f64 = 114, // 2 x f64
 v4f64 = 115, // 4 x f64
 v8f64 = 116, // 8 x f64
+ FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
+ LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
+
+ v2f16 = 63, // 2 x f16
+ v3f16 = 64, // 3 x f16
+ v4f16 = 65, // 4 x f16
+ v8f16 = 66, // 8 x f16
+ v16f16 = 67, // 16 x f16
+ v32f16 = 68, // 32 x f16
+ v1f32 = 69, // 1 x f32
+ v2f32 = 70, // 2 x f32
+ v3f32 = 71, // 3 x f32
+ v4f32 = 72, // 4 x f32
+ v5f32 = 73, // 5 x f32
+ v8f32 = 74, // 8 x f32
+ v16f32 = 75, // 16 x f32
+ v32f32 = 76, // 32 x f32
+ v64f32 = 77, // 64 x f32
+ v128f32 = 78, // 128 x f32
+ v256f32 = 79, // 256 x f32
+ v512f32 = 80, // 512 x f32
+ v1024f32 = 81, // 1024 x f32
+ v2048f32 = 82, // 2048 x f32
+ v1f64 = 83, // 1 x f64
+ v2f64 = 84, // 2 x f64
+ v4f64 = 85, // 4 x f64
+ v8f64 = 86, // 8 x f64
+
+ FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16,
+ LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v8f64,
+
+ FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
+ LAST_FIXEDLEN_VECTOR_VALUETYPE = v8f64,
+
+ nxv1i1 = 87, // n x 1 x i1
+ nxv2i1 = 88, // n x 2 x i1
+ nxv4i1 = 89, // n x 4 x i1
+ nxv8i1 = 90, // n x 8 x i1
+ nxv16i1 = 91, // n x 16 x i1
+ nxv32i1 = 92, // n x 32 x i1
+
+ nxv1i8 = 93, // n x 1 x i8
+ nxv2i8 = 94, // n x 2 x i8
+ nxv4i8 = 95, // n x 4 x i8
+ nxv8i8 = 96, // n x 8 x i8
+ nxv16i8 = 97, // n x 16 x i8
+ nxv32i8 = 98, // n x 32 x i8
+
+ nxv1i16 = 99, // n x 1 x i16
+ nxv2i16 = 100, // n x 2 x i16
+ nxv4i16 = 101, // n x 4 x i16
+ nxv8i16 = 102, // n x 8 x i16
+ nxv16i16 = 103, // n x 16 x i16
+ nxv32i16 = 104, // n x 32 x i16
+
+ nxv1i32 = 105, // n x 1 x i32
+ nxv2i32 = 106, // n x 2 x i32
+ nxv4i32 = 107, // n x 4 x i32
+ nxv8i32 = 108, // n x 8 x i32
+ nxv16i32 = 109, // n x 16 x i32
+ nxv32i32 = 110, // n x 32 x i32
+
+ nxv1i64 = 111, // n x 1 x i64
+ nxv2i64 = 112, // n x 2 x i64
+ nxv4i64 = 113, // n x 4 x i64
+ nxv8i64 = 114, // n x 8 x i64
+ nxv16i64 = 115, // n x 16 x i64
+ nxv32i64 = 116, // n x 32 x i64
+
+ FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
+ LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
nxv2f16 = 117, // n x 2 x f16
nxv4f16 = 118, // n x 4 x f16
nxv8f16 = 119, // n x 8 x f16
nxv1f32 = 120, // n x 1 x f32
nxv2f32 = 121, // n x 2 x f32
nxv4f32 = 122, // n x 4 x f32
nxv8f32 = 123, // n x 8 x f32
nxv16f32 = 124, // n x 16 x f32
nxv1f64 = 125, // n x 1 x f64
nxv2f64 = 126, // n x 2 x f64
nxv4f64 = 127, // n x 4 x f64
nxv8f64 = 128, // n x 8 x f64
 FIRST_FP_VECTOR_VALUETYPE = v2f16,
 LAST_FP_VECTOR_VALUETYPE = nxv8f64,
+ FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv2f16,
+ LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
 FIRST_FP_SCALABLE_VALUETYPE = nxv2f16,
 LAST_FP_SCALABLE_VALUETYPE = nxv8f64,
+ FIRST_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
+ LAST_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
x86mmx = 129, // This is an X86 MMX value
Glue = 130, // This glues nodes together during preRA sched
isVoid = 131, // This has no value
Untyped = 132, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
exnref = 133, // WebAssembly's exnref type
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
LAST_VALUETYPE = 134, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
// This value must be a multiple of 32.
MAX_ALLOWED_VALUETYPE = 160,
// A value of type llvm::TokenTy
token = 248,
// This is MDNode or MDString.
Metadata = 249,
// An int value the size of the pointer of the current
// target to any address space. This must only be used internal to
// tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
iPTRAny = 250,
// A vector with any length and element size. This is used
// for intrinsics that have overloadings based on vector types.
// This is only for tblgen's consumption!
vAny = 251,
// Any floatingpoint or vector floatingpoint value. This is used
// for intrinsics that have overloadings based on floatingpoint types.
// This is only for tblgen's consumption!
fAny = 252,
// An integer or vector integer value of any bit width. This is
// used for intrinsics that have overloadings based on integer bit widths.
// This is only for tblgen's consumption!
iAny = 253,
// An int value the size of the pointer of the current
// target. This should only be used internal to tblgen!
iPTR = 254,
// Any type. This is used for intrinsics that have overloadings.
// This is only for tblgen's consumption!
Any = 255
};
SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE;
constexpr MVT() = default;
constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
bool operator>(const MVT& S) const { return SimpleTy > S.SimpleTy; }
bool operator<(const MVT& S) const { return SimpleTy < S.SimpleTy; }
bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; }
bool operator!=(const MVT& S) const { return SimpleTy != S.SimpleTy; }
bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
/// Return true if this is a valid simple valuetype.
bool isValid() const {
return (SimpleTy >= MVT::FIRST_VALUETYPE &&
SimpleTy < MVT::LAST_VALUETYPE);
}
/// Return true if this is a FP or a vector FP type.
bool isFloatingPoint() const {
return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
SimpleTy <= MVT::LAST_FP_VALUETYPE) 
 (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE &&
 SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE));
+ (SimpleTy >= MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE) 
+ (SimpleTy >= MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE));
}
/// Return true if this is an integer or a vector integer type.
bool isInteger() const {
return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) 
 (SimpleTy >= MVT::FIRST_INTEGER_VECTOR_VALUETYPE &&
 SimpleTy <= MVT::LAST_INTEGER_VECTOR_VALUETYPE));
+ (SimpleTy >= MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE) 
+ (SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE));
}
/// Return true if this is an integer, not including vectors.
bool isScalarInteger() const {
return (SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
SimpleTy <= MVT::LAST_INTEGER_VALUETYPE);
}
/// Return true if this is a vector value type.
bool isVector() const {
return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
}
/// Return true if this is a vector value type where the
/// runtime length is machine dependent
bool isScalableVector() const {
 return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE &&
 SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) 
 (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE &&
 SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE));
+ return (SimpleTy >= MVT::FIRST_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_SCALABLE_VECTOR_VALUETYPE);
+ }
+
+ bool isFixedLengthVector() const {
+ return (SimpleTy >= MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE);
}
/// Return true if this is a 16bit vector type.
bool is16BitVector() const {
return (SimpleTy == MVT::v2i8  SimpleTy == MVT::v1i16 
SimpleTy == MVT::v16i1);
}
/// Return true if this is a 32bit vector type.
bool is32BitVector() const {
return (SimpleTy == MVT::v32i1  SimpleTy == MVT::v4i8 
SimpleTy == MVT::v2i16  SimpleTy == MVT::v1i32 
SimpleTy == MVT::v2f16  SimpleTy == MVT::v1f32);
}
/// Return true if this is a 64bit vector type.
bool is64BitVector() const {
return (SimpleTy == MVT::v64i1  SimpleTy == MVT::v8i8 
SimpleTy == MVT::v4i16  SimpleTy == MVT::v2i32 
SimpleTy == MVT::v1i64  SimpleTy == MVT::v4f16 
SimpleTy == MVT::v2f32  SimpleTy == MVT::v1f64);
}
/// Return true if this is a 128bit vector type.
bool is128BitVector() const {
return (SimpleTy == MVT::v128i1  SimpleTy == MVT::v16i8 
SimpleTy == MVT::v8i16  SimpleTy == MVT::v4i32 
SimpleTy == MVT::v2i64  SimpleTy == MVT::v1i128 
SimpleTy == MVT::v8f16  SimpleTy == MVT::v4f32 
SimpleTy == MVT::v2f64);
}
/// Return true if this is a 256bit vector type.
bool is256BitVector() const {
return (SimpleTy == MVT::v16f16  SimpleTy == MVT::v8f32 
SimpleTy == MVT::v4f64  SimpleTy == MVT::v32i8 
SimpleTy == MVT::v16i16  SimpleTy == MVT::v8i32 
SimpleTy == MVT::v4i64);
}
/// Return true if this is a 512bit vector type.
bool is512BitVector() const {
return (SimpleTy == MVT::v32f16  SimpleTy == MVT::v16f32 
SimpleTy == MVT::v8f64  SimpleTy == MVT::v512i1 
SimpleTy == MVT::v64i8  SimpleTy == MVT::v32i16 
SimpleTy == MVT::v16i32  SimpleTy == MVT::v8i64);
}
/// Return true if this is a 1024bit vector type.
bool is1024BitVector() const {
return (SimpleTy == MVT::v1024i1  SimpleTy == MVT::v128i8 
SimpleTy == MVT::v64i16  SimpleTy == MVT::v32i32 
SimpleTy == MVT::v16i64);
}
/// Return true if this is a 2048bit vector type.
bool is2048BitVector() const {
return (SimpleTy == MVT::v256i8  SimpleTy == MVT::v128i16 
SimpleTy == MVT::v64i32  SimpleTy == MVT::v32i64);
}
/// Return true if this is an overloaded type for TableGen.
bool isOverloaded() const {
return (SimpleTy==MVT::Any 
SimpleTy==MVT::iAny  SimpleTy==MVT::fAny 
SimpleTy==MVT::vAny  SimpleTy==MVT::iPTRAny);
}
/// Return a VT for a vector type with the same element type but
/// half the number of elements.
MVT getHalfNumVectorElementsVT() const {
MVT EltVT = getVectorElementType();
auto EltCnt = getVectorElementCount();
assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
return getVectorVT(EltVT, EltCnt / 2);
}
/// Returns true if the given vector is a power of 2.
bool isPow2VectorType() const {
unsigned NElts = getVectorNumElements();
return !(NElts & (NElts  1));
}
/// Widens the length of the given vector MVT up to the nearest power of 2
/// and returns that type.
MVT getPow2VectorType() const {
if (isPow2VectorType())
return *this;
unsigned NElts = getVectorNumElements();
unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
}
/// If this is a vector, return the element type, otherwise return this.
MVT getScalarType() const {
return isVector() ? getVectorElementType() : *this;
}
MVT getVectorElementType() const {
switch (SimpleTy) {
default:
llvm_unreachable("Not a vector MVT!");
case v1i1:
case v2i1:
case v4i1:
case v8i1:
case v16i1:
case v32i1:
case v64i1:
case v128i1:
case v512i1:
case v1024i1:
case nxv1i1:
case nxv2i1:
case nxv4i1:
case nxv8i1:
case nxv16i1:
case nxv32i1: return i1;
case v1i8:
case v2i8:
case v4i8:
case v8i8:
case v16i8:
case v32i8:
case v64i8:
case v128i8:
case v256i8:
case nxv1i8:
case nxv2i8:
case nxv4i8:
case nxv8i8:
case nxv16i8:
case nxv32i8: return i8;
case v1i16:
case v2i16:
case v3i16:
case v4i16:
case v8i16:
case v16i16:
case v32i16:
case v64i16:
case v128i16:
case nxv1i16:
case nxv2i16:
case nxv4i16:
case nxv8i16:
case nxv16i16:
case nxv32i16: return i16;
case v1i32:
case v2i32:
case v3i32:
case v4i32:
case v5i32:
case v8i32:
case v16i32:
case v32i32:
case v64i32:
case v128i32:
case v256i32:
case v512i32:
case v1024i32:
case v2048i32:
case nxv1i32:
case nxv2i32:
case nxv4i32:
case nxv8i32:
case nxv16i32:
case nxv32i32: return i32;
case v1i64:
case v2i64:
case v4i64:
case v8i64:
case v16i64:
case v32i64:
case nxv1i64:
case nxv2i64:
case nxv4i64:
case nxv8i64:
case nxv16i64:
case nxv32i64: return i64;
case v1i128: return i128;
case v2f16:
case v3f16:
case v4f16:
case v8f16:
case v16f16:
case v32f16:
case nxv2f16:
case nxv4f16:
case nxv8f16: return f16;
case v1f32:
case v2f32:
case v3f32:
case v4f32:
case v5f32:
case v8f32:
case v16f32:
case v32f32:
case v64f32:
case v128f32:
case v256f32:
case v512f32:
case v1024f32:
case v2048f32:
case nxv1f32:
case nxv2f32:
case nxv4f32:
case nxv8f32:
case nxv16f32: return f32;
case v1f64:
case v2f64:
case v4f64:
case v8f64:
case nxv1f64:
case nxv2f64:
case nxv4f64:
case nxv8f64: return f64;
}
}
unsigned getVectorNumElements() const {
switch (SimpleTy) {
default:
llvm_unreachable("Not a vector MVT!");
case v2048i32:
case v2048f32: return 2048;
case v1024i1:
case v1024i32:
case v1024f32: return 1024;
case v512i1:
case v512i32:
case v512f32: return 512;
case v256i8:
case v256i32:
case v256f32: return 256;
case v128i1:
case v128i8:
case v128i16:
case v128i32:
case v128f32: return 128;
case v64i1:
case v64i8:
case v64i16:
case v64i32:
case v64f32: return 64;
case v32i1:
case v32i8:
case v32i16:
case v32i32:
case v32i64:
case v32f16:
case v32f32:
case nxv32i1:
case nxv32i8:
case nxv32i16:
case nxv32i32:
case nxv32i64: return 32;
case v16i1:
case v16i8:
case v16i16:
case v16i32:
case v16i64:
case v16f16:
case v16f32:
case nxv16i1:
case nxv16i8:
case nxv16i16:
case nxv16i32:
case nxv16i64:
case nxv16f32: return 16;
case v8i1:
case v8i8:
case v8i16:
case v8i32:
case v8i64:
case v8f16:
case v8f32:
case v8f64:
case nxv8i1:
case nxv8i8:
case nxv8i16:
case nxv8i32:
case nxv8i64:
case nxv8f16:
case nxv8f32:
case nxv8f64: return 8;
case v5i32:
case v5f32: return 5;
case v4i1:
case v4i8:
case v4i16:
case v4i32:
case v4i64:
case v4f16:
case v4f32:
case v4f64:
case nxv4i1:
case nxv4i8:
case nxv4i16:
case nxv4i32:
case nxv4i64:
case nxv4f16:
case nxv4f32:
case nxv4f64: return 4;
case v3i16:
case v3i32:
case v3f16:
case v3f32: return 3;
case v2i1:
case v2i8:
case v2i16:
case v2i32:
case v2i64:
case v2f16:
case v2f32:
case v2f64:
case nxv2i1:
case nxv2i8:
case nxv2i16:
case nxv2i32:
case nxv2i64:
case nxv2f16:
case nxv2f32:
case nxv2f64: return 2;
case v1i1:
case v1i8:
case v1i16:
case v1i32:
case v1i64:
case v1i128:
case v1f32:
case v1f64:
case nxv1i1:
case nxv1i8:
case nxv1i16:
case nxv1i32:
case nxv1i64:
case nxv1f32:
case nxv1f64: return 1;
}
}
ElementCount getVectorElementCount() const {
return { getVectorNumElements(), isScalableVector() };
}
unsigned getSizeInBits() const {
switch (SimpleTy) {
default:
llvm_unreachable("getSizeInBits called on extended MVT.");
case Other:
llvm_unreachable("Value type is nonstandard value, Other.");
case iPTR:
llvm_unreachable("Value type size is targetdependent. Ask TLI.");
case iPTRAny:
case iAny:
case fAny:
case vAny:
case Any:
llvm_unreachable("Value type is overloaded.");
case token:
llvm_unreachable("Token type is a sentinel that cannot be used "
"in codegen and has no size");
case Metadata:
llvm_unreachable("Value type is metadata.");
case i1:
case v1i1:
case nxv1i1: return 1;
case v2i1:
case nxv2i1: return 2;
case v4i1:
case nxv4i1: return 4;
case i8 :
case v1i8:
case v8i1:
case nxv1i8:
case nxv8i1: return 8;
case i16 :
case f16:
case v16i1:
case v2i8:
case v1i16:
case nxv16i1:
case nxv2i8:
case nxv1i16: return 16;
case f32 :
case i32 :
case v32i1:
case v4i8:
case v2i16:
case v2f16:
case v1f32:
case v1i32:
case nxv32i1:
case nxv4i8:
case nxv2i16:
case nxv1i32:
case nxv2f16:
case nxv1f32: return 32;
case v3i16:
case v3f16: return 48;
case x86mmx:
case f64 :
case i64 :
case v64i1:
case v8i8:
case v4i16:
case v2i32:
case v1i64:
case v4f16:
case v2f32:
case v1f64:
case nxv8i8:
case nxv4i16:
case nxv2i32:
case nxv1i64:
case nxv4f16:
case nxv2f32:
case nxv1f64: return 64;
case f80 : return 80;
case v3i32:
case v3f32: return 96;
case f128:
case ppcf128:
case i128:
case v128i1:
case v16i8:
case v8i16:
case v4i32:
case v2i64:
case v1i128:
case v8f16:
case v4f32:
case v2f64:
case nxv16i8:
case nxv8i16:
case nxv4i32:
case nxv2i64:
case nxv8f16:
case nxv4f32:
case nxv2f64: return 128;
case v5i32:
case v5f32: return 160;
case v32i8:
case v16i16:
case v8i32:
case v4i64:
case v16f16:
case v8f32:
case v4f64:
case nxv32i8:
case nxv16i16:
case nxv8i32:
case nxv4i64:
case nxv8f32:
case nxv4f64: return 256;
case v512i1:
case v64i8:
case v32i16:
case v16i32:
case v8i64:
case v32f16:
case v16f32:
case v8f64:
case nxv32i16:
case nxv16i32:
case nxv8i64:
case nxv16f32:
case nxv8f64: return 512;
case v1024i1:
case v128i8:
case v64i16:
case v32i32:
case v16i64:
case v32f32:
case nxv32i32:
case nxv16i64: return 1024;
case v256i8:
case v128i16:
case v64i32:
case v32i64:
case v64f32:
case nxv32i64: return 2048;
case v128i32:
case v128f32: return 4096;
case v256i32:
case v256f32: return 8192;
case v512i32:
case v512f32: return 16384;
case v1024i32:
case v1024f32: return 32768;
case v2048i32:
case v2048f32: return 65536;
case exnref: return 0; // opaque type
}
}
unsigned getScalarSizeInBits() const {
return getScalarType().getSizeInBits();
}
/// Return the number of bytes overwritten by a store of the specified value
/// type.
unsigned getStoreSize() const {
return (getSizeInBits() + 7) / 8;
}
/// Return the number of bits overwritten by a store of the specified value
/// type.
unsigned getStoreSizeInBits() const {
return getStoreSize() * 8;
}
/// Return true if this has more bits than VT.
bool bitsGT(MVT VT) const {
return getSizeInBits() > VT.getSizeInBits();
}
/// Return true if this has no less bits than VT.
bool bitsGE(MVT VT) const {
return getSizeInBits() >= VT.getSizeInBits();
}
/// Return true if this has less bits than VT.
bool bitsLT(MVT VT) const {
return getSizeInBits() < VT.getSizeInBits();
}
/// Return true if this has no more bits than VT.
bool bitsLE(MVT VT) const {
return getSizeInBits() <= VT.getSizeInBits();
}
static MVT getFloatingPointVT(unsigned BitWidth) {
switch (BitWidth) {
default:
llvm_unreachable("Bad bit width!");
case 16:
return MVT::f16;
case 32:
return MVT::f32;
case 64:
return MVT::f64;
case 80:
return MVT::f80;
case 128:
return MVT::f128;
}
}
static MVT getIntegerVT(unsigned BitWidth) {
switch (BitWidth) {
default:
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
case 1:
return MVT::i1;
case 8:
return MVT::i8;
case 16:
return MVT::i16;
case 32:
return MVT::i32;
case 64:
return MVT::i64;
case 128:
return MVT::i128;
}
}
static MVT getVectorVT(MVT VT, unsigned NumElements) {
switch (VT.SimpleTy) {
default:
break;
case MVT::i1:
if (NumElements == 1) return MVT::v1i1;
if (NumElements == 2) return MVT::v2i1;
if (NumElements == 4) return MVT::v4i1;
if (NumElements == 8) return MVT::v8i1;
if (NumElements == 16) return MVT::v16i1;
if (NumElements == 32) return MVT::v32i1;
if (NumElements == 64) return MVT::v64i1;
if (NumElements == 128) return MVT::v128i1;
if (NumElements == 512) return MVT::v512i1;
if (NumElements == 1024) return MVT::v1024i1;
break;
case MVT::i8:
if (NumElements == 1) return MVT::v1i8;
if (NumElements == 2) return MVT::v2i8;
if (NumElements == 4) return MVT::v4i8;
if (NumElements == 8) return MVT::v8i8;
if (NumElements == 16) return MVT::v16i8;
if (NumElements == 32) return MVT::v32i8;
if (NumElements == 64) return MVT::v64i8;
if (NumElements == 128) return MVT::v128i8;
if (NumElements == 256) return MVT::v256i8;
break;
case MVT::i16:
if (NumElements == 1) return MVT::v1i16;
if (NumElements == 2) return MVT::v2i16;
if (NumElements == 3) return MVT::v3i16;
if (NumElements == 4) return MVT::v4i16;
if (NumElements == 8) return MVT::v8i16;
if (NumElements == 16) return MVT::v16i16;
if (NumElements == 32) return MVT::v32i16;
if (NumElements == 64) return MVT::v64i16;
if (NumElements == 128) return MVT::v128i16;
break;
case MVT::i32:
if (NumElements == 1) return MVT::v1i32;
if (NumElements == 2) return MVT::v2i32;
if (NumElements == 3) return MVT::v3i32;
if (NumElements == 4) return MVT::v4i32;
if (NumElements == 5) return MVT::v5i32;
if (NumElements == 8) return MVT::v8i32;
if (NumElements == 16) return MVT::v16i32;
if (NumElements == 32) return MVT::v32i32;
if (NumElements == 64) return MVT::v64i32;
if (NumElements == 128) return MVT::v128i32;
if (NumElements == 256) return MVT::v256i32;
if (NumElements == 512) return MVT::v512i32;
if (NumElements == 1024) return MVT::v1024i32;
if (NumElements == 2048) return MVT::v2048i32;
break;
case MVT::i64:
if (NumElements == 1) return MVT::v1i64;
if (NumElements == 2) return MVT::v2i64;
if (NumElements == 4) return MVT::v4i64;
if (NumElements == 8) return MVT::v8i64;
if (NumElements == 16) return MVT::v16i64;
if (NumElements == 32) return MVT::v32i64;
break;
case MVT::i128:
if (NumElements == 1) return MVT::v1i128;
break;
case MVT::f16:
if (NumElements == 2) return MVT::v2f16;
if (NumElements == 3) return MVT::v3f16;
if (NumElements == 4) return MVT::v4f16;
if (NumElements == 8) return MVT::v8f16;
if (NumElements == 16) return MVT::v16f16;
if (NumElements == 32) return MVT::v32f16;
break;
case MVT::f32:
if (NumElements == 1) return MVT::v1f32;
if (NumElements == 2) return MVT::v2f32;
if (NumElements == 3) return MVT::v3f32;
if (NumElements == 4) return MVT::v4f32;
if (NumElements == 5) return MVT::v5f32;
if (NumElements == 8) return MVT::v8f32;
if (NumElements == 16) return MVT::v16f32;
if (NumElements == 32) return MVT::v32f32;
if (NumElements == 64) return MVT::v64f32;
if (NumElements == 128) return MVT::v128f32;
if (NumElements == 256) return MVT::v256f32;
if (NumElements == 512) return MVT::v512f32;
if (NumElements == 1024) return MVT::v1024f32;
if (NumElements == 2048) return MVT::v2048f32;
break;
case MVT::f64:
if (NumElements == 1) return MVT::v1f64;
if (NumElements == 2) return MVT::v2f64;
if (NumElements == 4) return MVT::v4f64;
if (NumElements == 8) return MVT::v8f64;
break;
}
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
}
static MVT getScalableVectorVT(MVT VT, unsigned NumElements) {
switch(VT.SimpleTy) {
default:
break;
case MVT::i1:
if (NumElements == 1) return MVT::nxv1i1;
if (NumElements == 2) return MVT::nxv2i1;
if (NumElements == 4) return MVT::nxv4i1;
if (NumElements == 8) return MVT::nxv8i1;
if (NumElements == 16) return MVT::nxv16i1;
if (NumElements == 32) return MVT::nxv32i1;
break;
case MVT::i8:
if (NumElements == 1) return MVT::nxv1i8;
if (NumElements == 2) return MVT::nxv2i8;
if (NumElements == 4) return MVT::nxv4i8;
if (NumElements == 8) return MVT::nxv8i8;
if (NumElements == 16) return MVT::nxv16i8;
if (NumElements == 32) return MVT::nxv32i8;
break;
case MVT::i16:
if (NumElements == 1) return MVT::nxv1i16;
if (NumElements == 2) return MVT::nxv2i16;
if (NumElements == 4) return MVT::nxv4i16;
if (NumElements == 8) return MVT::nxv8i16;
if (NumElements == 16) return MVT::nxv16i16;
if (NumElements == 32) return MVT::nxv32i16;
break;
case MVT::i32:
if (NumElements == 1) return MVT::nxv1i32;
if (NumElements == 2) return MVT::nxv2i32;
if (NumElements == 4) return MVT::nxv4i32;
if (NumElements == 8) return MVT::nxv8i32;
if (NumElements == 16) return MVT::nxv16i32;
if (NumElements == 32) return MVT::nxv32i32;
break;
case MVT::i64:
if (NumElements == 1) return MVT::nxv1i64;
if (NumElements == 2) return MVT::nxv2i64;
if (NumElements == 4) return MVT::nxv4i64;
if (NumElements == 8) return MVT::nxv8i64;
if (NumElements == 16) return MVT::nxv16i64;
if (NumElements == 32) return MVT::nxv32i64;
break;
case MVT::f16:
if (NumElements == 2) return MVT::nxv2f16;
if (NumElements == 4) return MVT::nxv4f16;
if (NumElements == 8) return MVT::nxv8f16;
break;
case MVT::f32:
if (NumElements == 1) return MVT::nxv1f32;
if (NumElements == 2) return MVT::nxv2f32;
if (NumElements == 4) return MVT::nxv4f32;
if (NumElements == 8) return MVT::nxv8f32;
if (NumElements == 16) return MVT::nxv16f32;
break;
case MVT::f64:
if (NumElements == 1) return MVT::nxv1f64;
if (NumElements == 2) return MVT::nxv2f64;
if (NumElements == 4) return MVT::nxv4f64;
if (NumElements == 8) return MVT::nxv8f64;
break;
}
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
}
static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) {
if (IsScalable)
return getScalableVectorVT(VT, NumElements);
return getVectorVT(VT, NumElements);
}
static MVT getVectorVT(MVT VT, ElementCount EC) {
if (EC.Scalable)
return getScalableVectorVT(VT, EC.Min);
return getVectorVT(VT, EC.Min);
}
/// Return the value type corresponding to the specified type. This returns
/// all pointers as iPTR. If HandleUnknown is true, unknown types are
/// returned as Other, otherwise they are invalid.
static MVT getVT(Type *Ty, bool HandleUnknown = false);
private:
/// A simple iterator over the MVT::SimpleValueType enum.
struct mvt_iterator {
SimpleValueType VT;
mvt_iterator(SimpleValueType VT) : VT(VT) {}
MVT operator*() const { return VT; }
bool operator!=(const mvt_iterator &LHS) const { return VT != LHS.VT; }
mvt_iterator& operator++() {
VT = (MVT::SimpleValueType)((int)VT + 1);
assert((int)VT <= MVT::MAX_ALLOWED_VALUETYPE &&
"MVT iterator overflowed.");
return *this;
}
};
/// A range of the MVT::SimpleValueType enum.
using mvt_range = iterator_range<mvt_iterator>;
public:
/// SimpleValueType Iteration
/// @{
static mvt_range all_valuetypes() {
return mvt_range(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE);
}
static mvt_range integer_valuetypes() {
return mvt_range(MVT::FIRST_INTEGER_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_INTEGER_VALUETYPE + 1));
}
static mvt_range fp_valuetypes() {
return mvt_range(MVT::FIRST_FP_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_FP_VALUETYPE + 1));
}
static mvt_range vector_valuetypes() {
return mvt_range(MVT::FIRST_VECTOR_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1));
}
 static mvt_range integer_vector_valuetypes() {
+ static mvt_range fixedlen_vector_valuetypes() {
+ return mvt_range(
+ MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE + 1));
+ }
+
+ static mvt_range scalable_vector_valuetypes() {
return mvt_range(
 MVT::FIRST_INTEGER_VECTOR_VALUETYPE,
 (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1));
+ MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_SCALABLE_VECTOR_VALUETYPE + 1));
}
 static mvt_range fp_vector_valuetypes() {
+ static mvt_range integer_fixedlen_vector_valuetypes() {
return mvt_range(
 MVT::FIRST_FP_VECTOR_VALUETYPE,
 (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1));
+ MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE + 1));
+ }
+
+ static mvt_range fp_fixedlen_vector_valuetypes() {
+ return mvt_range(
+ MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE + 1));
}
static mvt_range integer_scalable_vector_valuetypes() {
 return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE,
 (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1));
+ return mvt_range(
+ MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE + 1));
}
static mvt_range fp_scalable_vector_valuetypes() {
 return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE,
 (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1));
+ return mvt_range(
+ MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE + 1));
}
/// @}
};
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEVALUETYPE_H
diff git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 7c6860eb26c3..0eb10a110421 100644
 a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ 1,1978 +1,1979 @@
//=== TargetLoweringBase.cpp  Implement the TargetLoweringBase class ===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDXLicenseIdentifier: Apache2.0 WITH LLVMexception
//
//======//
//
// This implements the TargetLoweringBase class.
//
//======//
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <string>
#include <tuple>
#include <utility>
using namespace llvm;
static cl::opt<bool> JumpIsExpensiveOverride(
"jumpisexpensive", cl::init(false),
cl::desc("Do not create extra branches to split comparison logic."),
cl::Hidden);
static cl::opt<unsigned> MinimumJumpTableEntries
("minjumptableentries", cl::init(4), cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table."));
static cl::opt<unsigned> MaximumJumpTableSize
("maxjumptablesize", cl::init(UINT_MAX), cl::Hidden,
cl::desc("Set maximum size of jump tables."));
/// Minimum jump table density for normal functions.
static cl::opt<unsigned>
JumpTableDensity("jumptabledensity", cl::init(10), cl::Hidden,
cl::desc("Minimum density for building a jump table in "
"a normal function"));
/// Minimum jump table density for Os or Oz functions.
static cl::opt<unsigned> OptsizeJumpTableDensity(
"optsizejumptabledensity", cl::init(40), cl::Hidden,
cl::desc("Minimum density for building a jump table in "
"an optsize function"));
static bool darwinHasSinCos(const Triple &TT) {
assert(TT.isOSDarwin() && "should be called with darwin triple");
// Don't bother with 32 bit x86.
if (TT.getArch() == Triple::x86)
return false;
// Macos < 10.9 has no sincos_stret.
if (TT.isMacOSX())
return !TT.isMacOSXVersionLT(10, 9) && TT.isArch64Bit();
// iOS < 7.0 has no sincos_stret.
if (TT.isiOS())
return !TT.isOSVersionLT(7, 0);
// Any other darwin such as WatchOS/TvOS is new enough.
return true;
}
// Although this default value is arbitrary, it is not random. It is assumed
// that a condition that evaluates the same way by a higher percentage than this
// is best represented as control flow. Therefore, the default value N should be
// set such that the win from N% correct executions is greater than the loss
// from (100  N)% mispredicted executions for the majority of intended targets.
static cl::opt<int> MinPercentageForPredictableBranch(
"minpredictablebranch", cl::init(99),
cl::desc("Minimum percentage (0100) that a condition must be either true "
"or false to assume that the condition is predictable"),
cl::Hidden);
void TargetLoweringBase::InitLibcalls(const Triple &TT) {
#define HANDLE_LIBCALL(code, name) \
setLibcallName(RTLIB::code, name);
#include "llvm/IR/RuntimeLibcalls.def"
#undef HANDLE_LIBCALL
// Initialize calling conventions to their default.
for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
// For IEEE quadprecision libcall names, PPC uses "kf" instead of "tf".
if (TT.getArch() == Triple::ppc  TT.isPPC64()) {
setLibcallName(RTLIB::ADD_F128, "__addkf3");
setLibcallName(RTLIB::SUB_F128, "__subkf3");
setLibcallName(RTLIB::MUL_F128, "__mulkf3");
setLibcallName(RTLIB::DIV_F128, "__divkf3");
setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
setLibcallName(RTLIB::UNE_F128, "__nekf2");
setLibcallName(RTLIB::OGE_F128, "__gekf2");
setLibcallName(RTLIB::OLT_F128, "__ltkf2");
setLibcallName(RTLIB::OLE_F128, "__lekf2");
setLibcallName(RTLIB::OGT_F128, "__gtkf2");
setLibcallName(RTLIB::UO_F128, "__unordkf2");
setLibcallName(RTLIB::O_F128, "__unordkf2");
}
// A few names are different on particular architectures or environments.
if (TT.isOSDarwin()) {
// For f16/f32 conversions, Darwin uses the standard naming scheme, instead
// of the gnueabistyle __gnu_*_ieee.
// FIXME: What about other targets?
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
// Some darwins have an optimized __bzero/bzero function.
switch (TT.getArch()) {
case Triple::x86:
case Triple::x86_64:
if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
setLibcallName(RTLIB::BZERO, "__bzero");
break;
case Triple::aarch64:
case Triple::aarch64_32:
setLibcallName(RTLIB::BZERO, "bzero");
break;
default:
break;
}
if (darwinHasSinCos(TT)) {
setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret");
setLibcallName(RTLIB::SINCOS_STRET_F64, "__sincos_stret");
if (TT.isWatchABI()) {
setLibcallCallingConv(RTLIB::SINCOS_STRET_F32,
CallingConv::ARM_AAPCS_VFP);
setLibcallCallingConv(RTLIB::SINCOS_STRET_F64,
CallingConv::ARM_AAPCS_VFP);
}
}
} else {
setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee");
setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
}
if (TT.isGNUEnvironment()  TT.isOSFuchsia() 
(TT.isAndroid() && !TT.isAndroidVersionLT(9))) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
setLibcallName(RTLIB::SINCOS_F80, "sincosl");
setLibcallName(RTLIB::SINCOS_F128, "sincosl");
setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
}
if (TT.isPS4CPU()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
}
if (TT.isOSOpenBSD()) {
setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
}
}
/// getFPEXT  Return the FPEXT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f16) {
if (RetVT == MVT::f32)
return FPEXT_F16_F32;
} else if (OpVT == MVT::f32) {
if (RetVT == MVT::f64)
return FPEXT_F32_F64;
if (RetVT == MVT::f128)
return FPEXT_F32_F128;
if (RetVT == MVT::ppcf128)
return FPEXT_F32_PPCF128;
} else if (OpVT == MVT::f64) {
if (RetVT == MVT::f128)
return FPEXT_F64_F128;
else if (RetVT == MVT::ppcf128)
return FPEXT_F64_PPCF128;
} else if (OpVT == MVT::f80) {
if (RetVT == MVT::f128)
return FPEXT_F80_F128;
}
return UNKNOWN_LIBCALL;
}
/// getFPROUND  Return the FPROUND_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::f16) {
if (OpVT == MVT::f32)
return FPROUND_F32_F16;
if (OpVT == MVT::f64)
return FPROUND_F64_F16;
if (OpVT == MVT::f80)
return FPROUND_F80_F16;
if (OpVT == MVT::f128)
return FPROUND_F128_F16;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F16;
} else if (RetVT == MVT::f32) {
if (OpVT == MVT::f64)
return FPROUND_F64_F32;
if (OpVT == MVT::f80)
return FPROUND_F80_F32;
if (OpVT == MVT::f128)
return FPROUND_F128_F32;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F32;
} else if (RetVT == MVT::f64) {
if (OpVT == MVT::f80)
return FPROUND_F80_F64;
if (OpVT == MVT::f128)
return FPROUND_F128_F64;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F64;
} else if (RetVT == MVT::f80) {
if (OpVT == MVT::f128)
return FPROUND_F128_F80;
}
return UNKNOWN_LIBCALL;
}
/// getFPTOSINT  Return the FPTOSINT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f32) {
if (RetVT == MVT::i32)
return FPTOSINT_F32_I32;
if (RetVT == MVT::i64)
return FPTOSINT_F32_I64;
if (RetVT == MVT::i128)
return FPTOSINT_F32_I128;
} else if (OpVT == MVT::f64) {
if (RetVT == MVT::i32)
return FPTOSINT_F64_I32;
if (RetVT == MVT::i64)
return FPTOSINT_F64_I64;
if (RetVT == MVT::i128)
return FPTOSINT_F64_I128;
} else if (OpVT == MVT::f80) {
if (RetVT == MVT::i32)
return FPTOSINT_F80_I32;
if (RetVT == MVT::i64)
return FPTOSINT_F80_I64;
if (RetVT == MVT::i128)
return FPTOSINT_F80_I128;
} else if (OpVT == MVT::f128) {
if (RetVT == MVT::i32)
return FPTOSINT_F128_I32;
if (RetVT == MVT::i64)
return FPTOSINT_F128_I64;
if (RetVT == MVT::i128)
return FPTOSINT_F128_I128;
} else if (OpVT == MVT::ppcf128) {
if (RetVT == MVT::i32)
return FPTOSINT_PPCF128_I32;
if (RetVT == MVT::i64)
return FPTOSINT_PPCF128_I64;
if (RetVT == MVT::i128)
return FPTOSINT_PPCF128_I128;
}
return UNKNOWN_LIBCALL;
}
/// getFPTOUINT  Return the FPTOUINT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f32) {
if (RetVT == MVT::i32)
return FPTOUINT_F32_I32;
if (RetVT == MVT::i64)
return FPTOUINT_F32_I64;
if (RetVT == MVT::i128)
return FPTOUINT_F32_I128;
} else if (OpVT == MVT::f64) {
if (RetVT == MVT::i32)
return FPTOUINT_F64_I32;
if (RetVT == MVT::i64)
return FPTOUINT_F64_I64;
if (RetVT == MVT::i128)
return FPTOUINT_F64_I128;
} else if (OpVT == MVT::f80) {
if (RetVT == MVT::i32)
return FPTOUINT_F80_I32;
if (RetVT == MVT::i64)
return FPTOUINT_F80_I64;
if (RetVT == MVT::i128)
return FPTOUINT_F80_I128;
} else if (OpVT == MVT::f128) {
if (RetVT == MVT::i32)
return FPTOUINT_F128_I32;
if (RetVT == MVT::i64)
return FPTOUINT_F128_I64;
if (RetVT == MVT::i128)
return FPTOUINT_F128_I128;
} else if (OpVT == MVT::ppcf128) {
if (RetVT == MVT::i32)
return FPTOUINT_PPCF128_I32;
if (RetVT == MVT::i64)
return FPTOUINT_PPCF128_I64;
if (RetVT == MVT::i128)
return FPTOUINT_PPCF128_I128;
}
return UNKNOWN_LIBCALL;
}
/// getSINTTOFP  Return the SINTTOFP_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::i32) {
if (RetVT == MVT::f32)
return SINTTOFP_I32_F32;
if (RetVT == MVT::f64)
return SINTTOFP_I32_F64;
if (RetVT == MVT::f80)
return SINTTOFP_I32_F80;
if (RetVT == MVT::f128)
return SINTTOFP_I32_F128;
if (RetVT == MVT::ppcf128)
return SINTTOFP_I32_PPCF128;
} else if (OpVT == MVT::i64) {
if (RetVT == MVT::f32)
return SINTTOFP_I64_F32;
if (RetVT == MVT::f64)
return SINTTOFP_I64_F64;
if (RetVT == MVT::f80)
return SINTTOFP_I64_F80;
if (RetVT == MVT::f128)
return SINTTOFP_I64_F128;
if (RetVT == MVT::ppcf128)
return SINTTOFP_I64_PPCF128;
} else if (OpVT == MVT::i128) {
if (RetVT == MVT::f32)
return SINTTOFP_I128_F32;
if (RetVT == MVT::f64)
return SINTTOFP_I128_F64;
if (RetVT == MVT::f80)
return SINTTOFP_I128_F80;
if (RetVT == MVT::f128)
return SINTTOFP_I128_F128;
if (RetVT == MVT::ppcf128)
return SINTTOFP_I128_PPCF128;
}
return UNKNOWN_LIBCALL;
}
/// getUINTTOFP  Return the UINTTOFP_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::i32) {
if (RetVT == MVT::f32)
return UINTTOFP_I32_F32;
if (RetVT == MVT::f64)
return UINTTOFP_I32_F64;
if (RetVT == MVT::f80)
return UINTTOFP_I32_F80;
if (RetVT == MVT::f128)
return UINTTOFP_I32_F128;
if (RetVT == MVT::ppcf128)
return UINTTOFP_I32_PPCF128;
} else if (OpVT == MVT::i64) {
if (RetVT == MVT::f32)
return UINTTOFP_I64_F32;
if (RetVT == MVT::f64)
return UINTTOFP_I64_F64;
if (RetVT == MVT::f80)
return UINTTOFP_I64_F80;
if (RetVT == MVT::f128)
return UINTTOFP_I64_F128;
if (RetVT == MVT::ppcf128)
return UINTTOFP_I64_PPCF128;
} else if (OpVT == MVT::i128) {
if (RetVT == MVT::f32)
return UINTTOFP_I128_F32;
if (RetVT == MVT::f64)
return UINTTOFP_I128_F64;
if (RetVT == MVT::f80)
return UINTTOFP_I128_F80;
if (RetVT == MVT::f128)
return UINTTOFP_I128_F128;
if (RetVT == MVT::ppcf128)
return UINTTOFP_I128_PPCF128;
}
return UNKNOWN_LIBCALL;
}
RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
#define OP_TO_LIBCALL(Name, Enum) \
case Name: \
switch (VT.SimpleTy) { \
default: \
return UNKNOWN_LIBCALL; \
case MVT::i8: \
return Enum##_1; \
case MVT::i16: \
return Enum##_2; \
case MVT::i32: \
return Enum##_4; \
case MVT::i64: \
return Enum##_8; \
case MVT::i128: \
return Enum##_16; \
}
switch (Opc) {
OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
}
#undef OP_TO_LIBCALL
return UNKNOWN_LIBCALL;
}
RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
}
RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
}
RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
}
/// InitCmpLibcallCCs  Set default comparison libcall CC.
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
CCs[RTLIB::UNE_F32] = ISD::SETNE;
CCs[RTLIB::UNE_F64] = ISD::SETNE;
CCs[RTLIB::UNE_F128] = ISD::SETNE;
CCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
CCs[RTLIB::OGE_F32] = ISD::SETGE;
CCs[RTLIB::OGE_F64] = ISD::SETGE;
CCs[RTLIB::OGE_F128] = ISD::SETGE;
CCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
CCs[RTLIB::OLT_F32] = ISD::SETLT;
CCs[RTLIB::OLT_F64] = ISD::SETLT;
CCs[RTLIB::OLT_F128] = ISD::SETLT;
CCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
CCs[RTLIB::OLE_F32] = ISD::SETLE;
CCs[RTLIB::OLE_F64] = ISD::SETLE;
CCs[RTLIB::OLE_F128] = ISD::SETLE;
CCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
CCs[RTLIB::OGT_F32] = ISD::SETGT;
CCs[RTLIB::OGT_F64] = ISD::SETGT;
CCs[RTLIB::OGT_F128] = ISD::SETGT;
CCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
CCs[RTLIB::UO_F32] = ISD::SETNE;
CCs[RTLIB::UO_F64] = ISD::SETNE;
CCs[RTLIB::UO_F128] = ISD::SETNE;
CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
CCs[RTLIB::O_F32] = ISD::SETEQ;
CCs[RTLIB::O_F64] = ISD::SETEQ;
CCs[RTLIB::O_F128] = ISD::SETEQ;
CCs[RTLIB::O_PPCF128] = ISD::SETEQ;
}
/// NOTE: The TargetMachine owns TLOF.
TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
initActions();
// Perform these initializations only once.
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove =
MaxLoadsPerMemcmp = 8;
MaxGluedStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
EnableExtLdPromotion = false;
StackPointerRegisterToSaveRestore = 0;
BooleanContents = UndefinedBooleanContent;
BooleanFloatContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::ILP;
GatherAllAliasesMaxDepth = 18;
// TODO: the default will be switched to 0 in the next commit, along
// with the Targetspecific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
MinCmpXchgSizeInBits = 0;
SupportsUnalignedAtomics = false;
std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
InitLibcalls(TM.getTargetTriple());
InitCmpLibcallCCs(CmpLibcallCCs);
}
void TargetLoweringBase::initActions() {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));
std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0);
for (MVT VT : MVT::fp_valuetypes()) {
MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
if (IntVT.isValid()) {
setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
}
}
// Set default actions for various operations.
for (MVT VT : MVT::all_valuetypes()) {
// Default all indexed load / store to expand.
for (unsigned IM = (unsigned)ISD::PRE_INC;
IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
setIndexedLoadAction(IM, VT, Expand);
setIndexedStoreAction(IM, VT, Expand);
}
// Most backends expect to see the node which just returns the value loaded.
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Expand);
setOperationAction(ISD::FMAXNUM, VT, Expand);
setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
setOperationAction(ISD::FMINIMUM, VT, Expand);
setOperationAction(ISD::FMAXIMUM, VT, Expand);
setOperationAction(ISD::FMAD, VT, Expand);
setOperationAction(ISD::SMIN, VT, Expand);
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
setOperationAction(ISD::ABS, VT, Expand);
setOperationAction(ISD::FSHL, VT, Expand);
setOperationAction(ISD::FSHR, VT, Expand);
setOperationAction(ISD::SADDSAT, VT, Expand);
setOperationAction(ISD::UADDSAT, VT, Expand);
setOperationAction(ISD::SSUBSAT, VT, Expand);
setOperationAction(ISD::USUBSAT, VT, Expand);
setOperationAction(ISD::SMULFIX, VT, Expand);
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
setOperationAction(ISD::UMULFIXSAT, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
setOperationAction(ISD::SSUBO, VT, Expand);
setOperationAction(ISD::UADDO, VT, Expand);
setOperationAction(ISD::USUBO, VT, Expand);
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
// ADDCARRY operations default to expand
setOperationAction(ISD::ADDCARRY, VT, Expand);
setOperationAction(ISD::SUBCARRY, VT, Expand);
setOperationAction(ISD::SETCCCARRY, VT, Expand);
// ADDC/ADDE/SUBC/SUBE default to expand.
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::ADDE, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::SUBE, VT, Expand);
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::BITREVERSE, VT, Expand);
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
// These operations default to expand for vector types.
if (VT.isVector()) {
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
// Constrained floatingpoint operations default to expand.
setOperationAction(ISD::STRICT_FADD, VT, Expand);
setOperationAction(ISD::STRICT_FSUB, VT, Expand);
setOperationAction(ISD::STRICT_FMUL, VT, Expand);
setOperationAction(ISD::STRICT_FDIV, VT, Expand);
setOperationAction(ISD::STRICT_FREM, VT, Expand);
setOperationAction(ISD::STRICT_FMA, VT, Expand);
setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
setOperationAction(ISD::STRICT_FPOW, VT, Expand);
setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
setOperationAction(ISD::STRICT_FSIN, VT, Expand);
setOperationAction(ISD::STRICT_FCOS, VT, Expand);
setOperationAction(ISD::STRICT_FEXP, VT, Expand);
setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
setOperationAction(ISD::STRICT_FLOG, VT, Expand);
setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
setOperationAction(ISD::STRICT_FRINT, VT, Expand);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
setOperationAction(ISD::STRICT_FROUND, VT, Expand);
setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand);
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand);
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
// Vector reduction default to expand.
setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
// Most targets also ignore the @llvm.readcyclecounter intrinsic.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
// These library functions default to expand.
for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
setOperationAction(ISD::FCBRT, VT, Expand);
setOperationAction(ISD::FLOG , VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP , VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FROUND, VT, Expand);
setOperationAction(ISD::LROUND, VT, Expand);
setOperationAction(ISD::LLROUND, VT, Expand);
setOperationAction(ISD::LRINT, VT, Expand);
setOperationAction(ISD::LLRINT, VT, Expand);
}
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
// On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
// here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
}
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
}
EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
bool LegalTypes) const {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy)
: getPointerTy(DL);
}
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
assert(isTypeLegal(VT));
switch (Op) {
default:
return false;
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
return true;
}
}
void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
// If the commandline option was specified, ignore this request.
if (!JumpIsExpensiveOverride.getNumOccurrences())
JumpIsExpensive = isExpensive;
}
TargetLoweringBase::LegalizeKind
TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// If this is a simple type, use the ComputeRegisterProp mechanism.
if (VT.isSimple()) {
MVT SVT = VT.getSimpleVT();
assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
MVT NVT = TransformToType[SVT.SimpleTy];
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
assert((LA == TypeLegal  LA == TypeSoftenFloat 
(NVT.isVector() 
ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) &&
"Promote may not follow Expand or Promote");
if (LA == TypeSplitVector)
return LegalizeKind(LA,
EVT::getVectorVT(Context, SVT.getVectorElementType(),
SVT.getVectorNumElements() / 2));
if (LA == TypeScalarizeVector)
return LegalizeKind(LA, SVT.getVectorElementType());
return LegalizeKind(LA, NVT);
}
// Handle Extended Scalar Types.
if (!VT.isVector()) {
assert(VT.isInteger() && "Float types must be simple");
unsigned BitSize = VT.getSizeInBits();
// First promote to a poweroftwo size, then expand if necessary.
if (BitSize < 8  !isPowerOf2_32(BitSize)) {
EVT NVT = VT.getRoundIntegerType(Context);
assert(NVT != VT && "Unable to round integer VT");
LegalizeKind NextStep = getTypeConversion(Context, NVT);
// Avoid multistep promotion.
if (NextStep.first == TypePromoteInteger)
return NextStep;
// Return rounded integer type.
return LegalizeKind(TypePromoteInteger, NVT);
}
return LegalizeKind(TypeExpandInteger,
EVT::getIntegerVT(Context, VT.getSizeInBits() / 2));
}
// Handle vector types.
unsigned NumElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
// Vectors with only one element are always scalarized.
if (NumElts == 1)
return LegalizeKind(TypeScalarizeVector, EltVT);
// Try to widen vector elements until the element type is a power of two and
// promote it to a legal type later on, for example:
// <3 x i8> > <4 x i8> > <4 x i32>
if (EltVT.isInteger()) {
// Vectors with a number of elements that is not a power of two are always
// widened, for example <3 x i8> > <4 x i8>.
if (!VT.isPow2VectorType()) {
NumElts = (unsigned)NextPowerOf2(NumElts);
EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
return LegalizeKind(TypeWidenVector, NVT);
}
// Examine the element type.
LegalizeKind LK = getTypeConversion(Context, EltVT);
// If type is to be expanded, split the vector.
// <4 x i140> > <2 x i140>
if (LK.first == TypeExpandInteger)
return LegalizeKind(TypeSplitVector,
EVT::getVectorVT(Context, EltVT, NumElts / 2));
// Promote the integer element types until a legal vector type is found
// or until the element integer type is too big. If a legal type was not
// found, fallback to the usual mechanism of widening/splitting the
// vector.
EVT OldEltVT = EltVT;
while (true) {
// Increase the bitwidth of the element to the next powoftwo
// (which is greater than 8 bits).
EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
.getRoundIntegerType(Context);
// Stop trying when getting a nonsimple element type.
// Note that vector elements may be greater than legal vector element
// types. Example: X86 XMM registers hold 64bit element on 32bit
// systems.
if (!EltVT.isSimple())
break;
// Build a new vector type and check if it is legal.
MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
// Found a legal promoted vector type.
if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
return LegalizeKind(TypePromoteInteger,
EVT::getVectorVT(Context, EltVT, NumElts));
}
// Reset the type to the unexpanded type if we did not find a legal vector
// type with a promoted vector element type.
EltVT = OldEltVT;
}
// Try to widen the vector until a legal type is found.
// If there is no wider legal type, split the vector.
while (true) {
// Round up to the next power of 2.
NumElts = (unsigned)NextPowerOf2(NumElts);
// If there is no simple vector type with this many elements then there
// cannot be a larger legal vector type. Note that this assumes that
// there are no skipped intermediate vector types in the simple types.
if (!EltVT.isSimple())
break;
MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
if (LargerVector == MVT())
break;
// If this type is legal then widen the vector.
if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal)
return LegalizeKind(TypeWidenVector, LargerVector);
}
// Widen odd vectors to next power of two.
if (!VT.isPow2VectorType()) {
EVT NVT = VT.getPow2VectorType(Context);
return LegalizeKind(TypeWidenVector, NVT);
}
// Vectors with illegal element types are expanded.
EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
return LegalizeKind(TypeSplitVector, NVT);
}
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned &NumIntermediates,
MVT &RegisterVT,
TargetLoweringBase *TLI) {
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
// FIXME: We don't support nonpowerof2sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !TLI>isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
NumElts >>= 1;
NumVectorRegs <<= 1;
}
NumIntermediates = NumVectorRegs;
MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
if (!TLI>isTypeLegal(NewVT))
NewVT = EltTy;
IntermediateVT = NewVT;
unsigned NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
if (!isPowerOf2_32(NewVTSize))
NewVTSize = NextPowerOf2(NewVTSize);
MVT DestVT = TLI>getRegisterType(NewVT);
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 > i16.
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
/// isLegalRC  Return true if the value types that can be represented by the
/// specified register class are all legal.
bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI,
const TargetRegisterClass &RC) const {
for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I)
if (isTypeLegal(*I))
return true;
return false;
}
/// Replace/modify any TargetFrameIndex operands with a targtedependent
/// sequence of memory operands that is recognized by PrologEpilogInserter.
MachineBasicBlock *
TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
MachineBasicBlock *MBB) const {
MachineInstr *MI = &InitialMI;
MachineFunction &MF = *MI>getMF();
MachineFrameInfo &MFI = MF.getFrameInfo();
// We're handling multiple types of operands here:
// PATCHPOINT MetaArgs  livein, read only, direct
// STATEPOINT Deopt Spill  livethrough, read only, indirect
// STATEPOINT Deopt Alloca  livethrough, read only, direct
// (We're currently conservative and mark the deopt slots read/write in
// practice.)
// STATEPOINT GC Spill  livethrough, read/write, indirect
// STATEPOINT GC Alloca  livethrough, read/write, direct
// The livein vs livethrough is handled already (the live through ones are
// all stack slots), but we need to handle the different type of stackmap
// operands and memory effects here.
// MI changes inside this loop as we grow operands.
for(unsigned OperIdx = 0; OperIdx != MI>getNumOperands(); ++OperIdx) {
MachineOperand &MO = MI>getOperand(OperIdx);
if (!MO.isFI())
continue;
// foldMemoryOperand builds a new MI after replacing a single FI operand
// with the canonical set of five x86 addressingmode operands.
int FI = MO.getIndex();
MachineInstrBuilder MIB = BuildMI(MF, MI>getDebugLoc(), MI>getDesc());
// Copy operands before the frameindex.
for (unsigned i = 0; i < OperIdx; ++i)
MIB.add(MI>getOperand(i));
// Add frame index operands recognized by stackmaps.cpp
if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
// indirectmemref tag, size, #FI, offset.
// Used for spills inserted by StatepointLowering. This codepath is not
// used for patchpoints/stackmaps at all, for these spilling is done via
// foldMemoryOperand callback only.
assert(MI>getOpcode() == TargetOpcode::STATEPOINT && "sanity");
MIB.addImm(StackMaps::IndirectMemRefOp);
MIB.addImm(MFI.getObjectSize(FI));
MIB.add(MI>getOperand(OperIdx));
MIB.addImm(0);
} else {
// directmemref tag, #FI, offset.
// Used by patchpoint, and direct alloca arguments to statepoints
MIB.addImm(StackMaps::DirectMemRefOp);
MIB.add(MI>getOperand(OperIdx));
MIB.addImm(0);
}
// Copy the operands after the frame index.
for (unsigned i = OperIdx + 1; i != MI>getNumOperands(); ++i)
MIB.add(MI>getOperand(i));
// Inherit previous memory operands.
MIB.cloneMemRefs(*MI);
assert(MIB>mayLoad() && "Folded a stackmap use to a nonload!");
// Add a new memory operand for this FI.
assert(MFI.getObjectOffset(FI) != 1);
// Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and
// PATCHPOINT should be updated to do the same. (TODO)
if (MI>getOpcode() != TargetOpcode::STATEPOINT) {
auto Flags = MachineMemOperand::MOLoad;
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), Flags,
MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
MIB>addMemOperand(MF, MMO);
}
// Replace the instruction and update the operand index.
MBB>insert(MachineBasicBlock::iterator(MI), MIB);
OperIdx += (MIB>getNumOperands()  MI>getNumOperands())  1;
MI>eraseFromParent();
MI = MIB;
}
return MBB;
}
MachineBasicBlock *
TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI,
MachineBasicBlock *MBB) const {
assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL &&
"Called emitXRayCustomEvent on the wrong MI!");
auto &MF = *MI.getMF();
auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
MIB.add(MI.getOperand(OpIdx));
MBB>insert(MachineBasicBlock::iterator(MI), MIB);
MI.eraseFromParent();
return MBB;
}
MachineBasicBlock *
TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI,
MachineBasicBlock *MBB) const {
assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL &&
"Called emitXRayTypedEvent on the wrong MI!");
auto &MF = *MI.getMF();
auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
MIB.add(MI.getOperand(OpIdx));
MBB>insert(MachineBasicBlock::iterator(MI), MIB);
MI.eraseFromParent();
return MBB;
}
/// findRepresentativeClass  Return the largest legal superreg register class
/// of the register class for the specified type and its associated "cost".
// This function is in TargetLowering because it uses RegClassForVT which would
// need to be moved to TargetRegisterInfo and would necessitate moving
// isTypeLegal over as well  a massive change that would just require
// TargetLowering having a TargetRegisterInfo class member that it would use.
std::pair<const TargetRegisterClass *, uint8_t>
TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
if (!RC)
return std::make_pair(RC, 0);
// Compute the set of all superregister classes.
BitVector SuperRegRC(TRI>getNumRegClasses());
for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
SuperRegRC.setBitsInMask(RCI.getMask());
// Find the first legal register class with the largest spill size.
const TargetRegisterClass *BestRC = RC;
for (unsigned i : SuperRegRC.set_bits()) {
const TargetRegisterClass *SuperRC = TRI>getRegClass(i);
// We want the largest possible spill size.
if (TRI>getSpillSize(*SuperRC) <= TRI>getSpillSize(*BestRC))
continue;
if (!isLegalRC(*TRI, *SuperRC))
continue;
BestRC = SuperRC;
}
return std::make_pair(BestRC, 1);
}
/// computeRegisterProperties  Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLoweringBase::computeRegisterProperties(
const TargetRegisterInfo *TRI) {
static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
"Too many value types for ValueTypeActions to hold!");
// Everything defaults to needing one register.
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
NumRegistersForVT[i] = 1;
RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
}
// ...except isVoid, which doesn't need any registers.
NumRegistersForVT[MVT::isVoid] = 0;
// Find the largest integer register class.
unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
for (; RegClassForVT[LargestIntReg] == nullptr; LargestIntReg)
assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
// Every integer value type larger than this largest register takes twice as
// many registers to represent as the previous ValueType.
for (unsigned ExpandedReg = LargestIntReg + 1;
ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg1];
RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg  1);
ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
TypeExpandInteger);
}
// Inspect all of the ValueType's smaller than the largest integer
// register to see which ones need promotion.
unsigned LegalIntReg = LargestIntReg;
for (unsigned IntReg = LargestIntReg  1;
IntReg >= (unsigned)MVT::i1; IntReg) {
MVT IVT = (MVT::SimpleValueType)IntReg;
if (isTypeLegal(IVT)) {
LegalIntReg = IntReg;
} else {
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
(MVT::SimpleValueType)LegalIntReg;
ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
}
}
// ppcf128 type is really two f64's.
if (!isTypeLegal(MVT::ppcf128)) {
if (isTypeLegal(MVT::f64)) {
NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
TransformToType[MVT::ppcf128] = MVT::f64;
ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
} else {
NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128];
RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128];
TransformToType[MVT::ppcf128] = MVT::i128;
ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat);
}
}
// Decide how to handle f128. If the target does not have native f128 support,
// expand it to i128 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f128)) {
NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
TransformToType[MVT::f128] = MVT::i128;
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
}
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f64)) {
NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
TransformToType[MVT::f64] = MVT::i64;
ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
}
// Decide how to handle f32. If the target does not have native f32 support,
// expand it to i32 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f32)) {
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
TransformToType[MVT::f32] = MVT::i32;
ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
}
// Decide how to handle f16. If the target does not have native f16 support,
// promote it to f32, because there are no f16 library calls (except for
// conversions).
if (!isTypeLegal(MVT::f16)) {
NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
TransformToType[MVT::f16] = MVT::f32;
ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
}
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType) i;
if (isTypeLegal(VT))
continue;
MVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
bool IsLegalWiderType = false;
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
switch (PreferredAction) {
case TypePromoteInteger:
// Try to promote the elements of integer vectors. If no legal
// promotion was found, fall through to the widenvector method.
 for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) {
+ for (unsigned nVT = i + 1;
+ nVT <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
IsLegalWiderType = true;
break;
}
}
if (IsLegalWiderType)
break;
LLVM_FALLTHROUGH;
case TypeWidenVector:
if (isPowerOf2_32(NElts)) {
// Try to widen the vector.
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
if (SVT.getVectorElementType() == EltVT
&& SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
IsLegalWiderType = true;
break;
}
}
if (IsLegalWiderType)
break;
} else {
// Only widen to the next power of 2 to keep consistency with EVT.
MVT NVT = VT.getPow2VectorType();
if (isTypeLegal(NVT)) {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
RegisterTypeForVT[i] = NVT;
NumRegistersForVT[i] = 1;
break;
}
}
LLVM_FALLTHROUGH;
case TypeSplitVector:
case TypeScalarizeVector: {
MVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
NumIntermediates, RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
MVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
TransformToType[i] = MVT::Other;
if (PreferredAction == TypeScalarizeVector)
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
else if (PreferredAction == TypeSplitVector)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
else
// Set type action according to the number of elements.
ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector
: TypeSplitVector);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
}
break;
}
default:
llvm_unreachable("Unknown vector legalization action!");
}
}
// Determine the 'representative' register class for each value type.
// An representative register class is the largest (meaning one which is
// not a subregister class / subreg register class) legal register class for
// a group of value types. For example, on i386, i8, i16, and i32
// representative would be GR32; while on x86_64 it's GR64.
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
const TargetRegisterClass* RRC;
uint8_t Cost;
std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
RepRegClassForVT[i] = RRC;
RepRegClassCostForVT[i] = Cost;
}
}
EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
assert(!VT.isVector() && "No default SetCC type for vectors!");
return getPointerTy(DL).SimpleTy;
}
MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
return MVT::i32; // return the default value
}
/// getVectorTypeBreakdown  Vector types are broken down into some number of
/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
///
/// This method returns the number of registers needed, and the VT for each
/// register. It also returns the VT and quantity of the intermediate values
/// before they are promoted/expanded.
unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT &IntermediateVT,
unsigned &NumIntermediates,
MVT &RegisterVT) const {
unsigned NumElts = VT.getVectorNumElements();
// If there is a wider vector type with the same element type as this one,
// or a promoted vector type that has the same number of elements which
// are wider, then we should convert to that legal vector type.
// This handles things like <2 x float> > <4 x float> and
// <4 x i1> > <4 x i32>.
LegalizeTypeAction TA = getTypeAction(Context, VT);
if (NumElts != 1 && (TA == TypeWidenVector  TA == TypePromoteInteger)) {
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterEVT)) {
IntermediateVT = RegisterEVT;
RegisterVT = RegisterEVT.getSimpleVT();
NumIntermediates = 1;
return 1;
}
}
// Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
// FIXME: We don't support nonpowerof2sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !isTypeLegal(
EVT::getVectorVT(Context, EltTy, NumElts))) {
NumElts >>= 1;
NumVectorRegs <<= 1;
}
NumIntermediates = NumVectorRegs;
EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
if (!isTypeLegal(NewVT))
NewVT = EltTy;
IntermediateVT = NewVT;
MVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
unsigned NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
if (!isPowerOf2_32(NewVTSize))
NewVTSize = NextPowerOf2(NewVTSize);
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 > i16.
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DL, ReturnType, ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
for (unsigned j = 0, f = NumValues; j != f; ++j) {
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
// at least 32bit. But this is not necessary for nonC calling
// conventions. The frontend should mark functions whose return values
// require promoting with signext or zeroext attributes.
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
MVT MinVT = TLI.getRegisterType(ReturnType>getContext(), MVT::i32);
if (VT.bitsLT(MinVT))
VT = MinVT;
}
unsigned NumParts =
TLI.getNumRegistersForCallingConv(ReturnType>getContext(), CC, VT);
MVT PartVT =
TLI.getRegisterTypeForCallingConv(ReturnType>getContext(), CC, VT);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
Flags.setInReg();
// Propagate extension type if any
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
Flags.setSExt();
else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
}
}
/// getByValTypeAlignment  Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
return DL.getABITypeAlignment(Ty);
}
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
unsigned AddrSpace,
unsigned Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
// would be to implement this check directly (make this a virtual function).
// For example, the ABI alignment may change based on software platform while
// this function should only be affected by hardware implementation.
Type *Ty = VT.getTypeForEVT(Context);
if (Alignment >= DL.getABITypeAlignment(Ty)) {
// Assume that an access that meets the ABIspecified alignment is fast.
if (Fast != nullptr)
*Fast = true;
return true;
}
// This is a misaligned access.
return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
}
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
bool *Fast) const {
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
MMO.getAlignment(), MMO.getFlags(), Fast);
}
BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
return BranchProbability(MinPercentageForPredictableBranch, 100);
}
//======//
// TargetTransformInfo Helpers
//======//
int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
enum InstructionOpcodes {
#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
#include "llvm/IR/Instruction.def"
};
switch (static_cast<InstructionOpcodes>(Opcode)) {
case Ret: return 0;
case Br: return 0;
case Switch: return 0;
case IndirectBr: return 0;
case Invoke: return 0;
case CallBr: return 0;
case Resume: return 0;
case Unreachable: return 0;
case CleanupRet: return 0;
case CatchRet: return 0;
case CatchPad: return 0;
case CatchSwitch: return 0;
case CleanupPad: return 0;
case FNeg: return ISD::FNEG;
case Add: return ISD::ADD;
case FAdd: return ISD::FADD;
case Sub: return ISD::SUB;
case FSub: return ISD::FSUB;
case Mul: return ISD::MUL;
case FMul: return ISD::FMUL;
case UDiv: return ISD::UDIV;
case SDiv: return ISD::SDIV;
case FDiv: return ISD::FDIV;
case URem: return ISD::UREM;
case SRem: return ISD::SREM;
case FRem: return ISD::FREM;
case Shl: return ISD::SHL;
case LShr: return ISD::SRL;
case AShr: return ISD::SRA;
case And: return ISD::AND;
case Or: return ISD::OR;
case Xor: return ISD::XOR;
case Alloca: return 0;
case Load: return ISD::LOAD;
case Store: return ISD::STORE;
case GetElementPtr: return 0;
case Fence: return 0;
case AtomicCmpXchg: return 0;
case AtomicRMW: return 0;
case Trunc: return ISD::TRUNCATE;
case ZExt: return ISD::ZERO_EXTEND;
case SExt: return ISD::SIGN_EXTEND;
case FPToUI: return ISD::FP_TO_UINT;
case FPToSI: return ISD::FP_TO_SINT;
case UIToFP: return ISD::UINT_TO_FP;
case SIToFP: return ISD::SINT_TO_FP;
case FPTrunc: return ISD::FP_ROUND;
case FPExt: return ISD::FP_EXTEND;
case PtrToInt: return ISD::BITCAST;
case IntToPtr: return ISD::BITCAST;
case BitCast: return ISD::BITCAST;
case AddrSpaceCast: return ISD::ADDRSPACECAST;
case ICmp: return ISD::SETCC;
case FCmp: return ISD::SETCC;
case PHI: return 0;
case Call: return 0;
case Select: return ISD::SELECT;
case UserOp1: return 0;
case UserOp2: return 0;
case VAArg: return 0;
case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
case InsertElement: return ISD::INSERT_VECTOR_ELT;
case ShuffleVector: return ISD::VECTOR_SHUFFLE;
case ExtractValue: return ISD::MERGE_VALUES;
case InsertValue: return ISD::MERGE_VALUES;
case LandingPad: return 0;
}
llvm_unreachable("Unknown instruction type encountered!");
}
std::pair<int, MVT>
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const {
LLVMContext &C = Ty>getContext();
EVT MTy = getValueType(DL, Ty);
int Cost = 1;
// We keep legalizing the type until we find a legal kind. We assume that
// the only operation that costs anything is the split. After splitting
// we need to handle two types.
while (true) {
LegalizeKind LK = getTypeConversion(C, MTy);
if (LK.first == TypeLegal)
return std::make_pair(Cost, MTy.getSimpleVT());
if (LK.first == TypeSplitVector  LK.first == TypeExpandInteger)
Cost *= 2;
// Do not loop with f128 type.
if (MTy == LK.second)
return std::make_pair(Cost, MTy.getSimpleVT());
// Keep legalizing the type.
MTy = LK.second;
}
}
Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
bool UseTLS) const {
// compilerrt provides a variable with a magic name. Targets that do not
// link with compilerrt may also provide such a variable.
Module *M = IRB.GetInsertBlock()>getParent()>getParent();
const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
auto UnsafeStackPtr =
dyn_cast_or_null<GlobalVariable>(M>getNamedValue(UnsafeStackPtrVar));
Type *StackPtrTy = Type::getInt8PtrTy(M>getContext());
if (!UnsafeStackPtr) {
auto TLSModel = UseTLS ?
GlobalValue::InitialExecTLSModel :
GlobalValue::NotThreadLocal;
// The global variable is not defined yet, define it ourselves.
// We use the initialexec TLS model because we do not support the
// variable living anywhere other than in the main executable.
UnsafeStackPtr = new GlobalVariable(
*M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
UnsafeStackPtrVar, nullptr, TLSModel);
} else {
// The variable exists, check its type and attributes.
if (UnsafeStackPtr>getValueType() != StackPtrTy)
report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
if (UseTLS != UnsafeStackPtr>isThreadLocal())
report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
(UseTLS ? "" : "not ") + "be threadlocal");
}
return UnsafeStackPtr;
}
Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
if (!TM.getTargetTriple().isAndroid())
return getDefaultSafeStackPointerLocation(IRB, true);
// Android provides a libc function to retrieve the address of the current
// thread's unsafe stack pointer.
Module *M = IRB.GetInsertBlock()>getParent()>getParent();
Type *StackPtrTy = Type::getInt8PtrTy(M>getContext());
FunctionCallee Fn = M>getOrInsertFunction("__safestack_pointer_address",
StackPtrTy>getPointerTo(0));
return IRB.CreateCall(Fn);
}
//======//
// Loop Strength Reduction hooks
//======//
/// isLegalAddressingMode  Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS, Instruction *I) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
// Allows a signextended 16bit immediate field.
if (AM.BaseOffs <= (1LL << 16)  AM.BaseOffs >= (1LL << 16)1)
return false;
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// Only support r+r,
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
return false;
// Otherwise we have r+r or r+i.
break;
case 2:
if (AM.HasBaseReg  AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
return false;
// Allow 2*r as r+r.
break;
default: // Don't allow n * r
return false;
}
return true;
}
//======//
// Stack Protector
//======//
// For OpenBSD return its special guard variable. Otherwise return nullptr,
// so that SelectionDAG handle SSP.
Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
Module &M = *IRB.GetInsertBlock()>getParent()>getParent();
PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
return M.getOrInsertGlobal("__guard_local", PtrTy);
}
return nullptr;
}
// Currently only support "standard" __stack_chk_guard.
// TODO: add LOAD_STACK_GUARD support.
void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
if (!M.getNamedValue("__stack_chk_guard"))
new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
GlobalVariable::ExternalLinkage,
nullptr, "__stack_chk_guard");
}
// Currently only support "standard" __stack_chk_guard.
// TODO: add LOAD_STACK_GUARD support.
Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
return M.getNamedValue("__stack_chk_guard");
}
Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
return nullptr;
}
unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
return MinimumJumpTableEntries;
}
void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
MinimumJumpTableEntries = Val;
}
unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const {
return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
}
unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
return MaximumJumpTableSize;
}
void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
MaximumJumpTableSize = Val;
}
//======//
// Reciprocal Estimates
//======//
/// Get the reciprocal estimate attribute string for a function that will
/// override the target defaults.
static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
const Function &F = MF.getFunction();
return F.getFnAttribute("reciprocalestimates").getValueAsString();
}
/// Construct a string for the given reciprocal operation of the given type.
/// This string should match the corresponding option to the frontend's
/// "mrecip" flag assuming those strings have been passed through in an
/// attribute string. For example, "vecdivf" for a division of a vXf32.
static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
std::string Name = VT.isVector() ? "vec" : "";
Name += IsSqrt ? "sqrt" : "div";
// TODO: Handle "half" or other float types?
if (VT.getScalarType() == MVT::f64) {
Name += "d";
} else {
assert(VT.getScalarType() == MVT::f32 &&
"Unexpected FP type for reciprocal estimate");
Name += "f";
}
return Name;
}
/// Return the character position and value (a single numeric character) of a
/// customized refinement operation in the input string if it exists. Return
/// false if there is no customized refinement step count.
static bool parseRefinementStep(StringRef In, size_t &Position,
uint8_t &Value) {
const char RefStepToken = ':';
Position = In.find(RefStepToken);
if (Position == StringRef::npos)
return false;
StringRef RefStepString = In.substr(Position + 1);
// Allow exactly one numeric character for the additional refinement
// step parameter.
if (RefStepString.size() == 1) {
char RefStepChar = RefStepString[0];
if (RefStepChar >= '0' && RefStepChar <= '9') {
Value = RefStepChar  '0';
return true;
}
}
report_fatal_error("Invalid refinement step for recip.");
}
/// For the input attribute string, return one of the ReciprocalEstimate enum
/// status values (enabled, disabled, or not specified) for this operation on
/// the specified data type.
static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
if (Override.empty())
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
SmallVector<StringRef, 4> OverrideVector;
Override.split(OverrideVector, ',');
unsigned NumArgs = OverrideVector.size();
// Check if "all", "none", or "default" was specified.
if (NumArgs == 1) {
// Look for an optional setting of the number of refinement steps needed
// for this type of reciprocal operation.
size_t RefPos;
uint8_t RefSteps;
if (parseRefinementStep(Override, RefPos, RefSteps)) {
// Split the string for further processing.
Override = Override.substr(0, RefPos);
}
// All reciprocal types are enabled.
if (Override == "all")
return TargetLoweringBase::ReciprocalEstimate::Enabled;
// All reciprocal types are disabled.
if (Override == "none")
return TargetLoweringBase::ReciprocalEstimate::Disabled;
// Target defaults for enablement are used.
if (Override == "default")
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
}
// The attribute string may omit the size suffix ('f'/'d').
std::string VTName = getReciprocalOpName(IsSqrt, VT);
std::string VTNameNoSize = VTName;
VTNameNoSize.pop_back();
static const char DisabledPrefix = '!';
for (StringRef RecipType : OverrideVector) {
size_t RefPos;
uint8_t RefSteps;
if (parseRefinementStep(RecipType, RefPos, RefSteps))
RecipType = RecipType.substr(0, RefPos);
// Ignore the disablement token for string matching.
bool IsDisabled = RecipType[0] == DisabledPrefix;
if (IsDisabled)
RecipType = RecipType.substr(1);
if (RecipType.equals(VTName)  RecipType.equals(VTNameNoSize))
return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
: TargetLoweringBase::ReciprocalEstimate::Enabled;
}
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
}
/// For the input attribute string, return the customized refinement step count
/// for this operation on the specified data type. If the step count does not
/// exist, return the ReciprocalEstimate enum value for unspecified.
static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
if (Override.empty())
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
SmallVector<StringRef, 4> OverrideVector;
Override.split(OverrideVector, ',');
unsigned NumArgs = OverrideVector.size();
// Check if "all", "default", or "none" was specified.
if (NumArgs == 1) {
// Look for an optional setting of the number of refinement steps needed
// for this type of reciprocal operation.
size_t RefPos;
uint8_t RefSteps;
if (!parseRefinementStep(Override, RefPos, RefSteps))
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
// Split the string for further processing.
Override = Override.substr(0, RefPos);
assert(Override != "none" &&
"Disabled reciprocals, but specifed refinement steps?");
// If this is a general override, return the specified number of steps.
if (Override == "all"  Override == "default")
return RefSteps;
}
// The attribute string may omit the size suffix ('f'/'d').
std::string VTName = getReciprocalOpName(IsSqrt, VT);
std::string VTNameNoSize = VTName;
VTNameNoSize.pop_back();
for (StringRef RecipType : OverrideVector) {
size_t RefPos;
uint8_t RefSteps;
if (!parseRefinementStep(RecipType, RefPos, RefSteps))
continue;
RecipType = RecipType.substr(0, RefPos);
if (RecipType.equals(VTName)  RecipType.equals(VTNameNoSize))
return RefSteps;
}
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
}
int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
MachineFunction &MF) const {
return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
}
int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
MachineFunction &MF) const {
return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
}
int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
MachineFunction &MF) const {
return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
}
int TargetLoweringBase::getDivRefinementSteps(EVT VT,
MachineFunction &MF) const {
return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
}
void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
MF.getRegInfo().freezeReservedRegs(MF);
}
diff git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 84b4e97420c7..77311c5b918b 100644
 a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ 1,12359 +1,12359 @@
//=== AArch64ISelLowering.cpp  AArch64 DAG Lowering Implementation ===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDXLicenseIdentifier: Apache2.0 WITH LLVMexception
//
//======//
//
// This file implements the AArch64TargetLowering class.
//
//======//
#include "AArch64ExpandImm.h"
#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64lower"
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64shiftinsertgeneration", cl::Hidden,
cl::desc("Allow AArch64 SLI/SRI formation"),
cl::init(false));
// FIXME: The necessary dtprel relocations don't seem to be supported
// well in the GNU bfd and gold linkers at the moment. Therefore, by
// default, for now, fall back to GeneralDynamic code generation.
cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
"aarch64elfldtlsgeneration", cl::Hidden,
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
static cl::opt<bool>
EnableOptimizeLogicalImm("aarch64enablelogicalimm", cl::Hidden,
cl::desc("Enable AArch64 logical imm instruction "
"optimization"),
cl::init(true));
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
setBooleanContents(ZeroOrOneBooleanContent);
// When comparing vectors the result sets the different elements in the
// vector to allone or allzero.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Set up the register classes.
addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
if (Subtarget>hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
}
if (Subtarget>hasNEON()) {
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
// Someone set us up the NEON.
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
}
if (Subtarget>hasSVE()) {
// Add legal sve predicate types
addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
// Add legal sve data types
addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv1f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
}
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget>getRegisterInfo());
// Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::f16, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f80, Expand);
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// Custom lowering hooks are needed for XOR
// to fold it into CSINC/CSINV.
setOperationAction(ISD::XOR, MVT::i32, Custom);
setOperationAction(ISD::XOR, MVT::i64, Custom);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
setOperationAction(ISD::FADD, MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
setOperationAction(ISD::FDIV, MVT::f128, Custom);
setOperationAction(ISD::FMA, MVT::f128, Expand);
setOperationAction(ISD::FMUL, MVT::f128, Custom);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FSUB, MVT::f128, Custom);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
// Lowering for many of the conversions is actually specified by the nonf128
// type. The LowerXXX function will be trivial when f128 isn't involved.
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
// Variable arguments.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// Variablesized objects.
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget>isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
// Constant pool entries
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
// BlockAddress
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
// Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
setOperationAction(ISD::ADDC, MVT::i64, Custom);
setOperationAction(ISD::ADDE, MVT::i64, Custom);
setOperationAction(ISD::SUBC, MVT::i64, Custom);
setOperationAction(ISD::SUBE, MVT::i64, Custom);
// AArch64 lacks both leftrotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
 for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
// AArch64 doesn't have {US}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
// Custom lower Add/Sub/Mul with overflow.
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction(ISD::SADDO, MVT::i64, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i64, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::UMULO, MVT::i32, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
if (Subtarget>hasFullFP16())
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::v4f16, Expand);
setOperationAction(ISD::FREM, MVT::v8f16, Expand);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
if (!Subtarget>hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
setOperationAction(ISD::SETCC, MVT::f16, Promote);
setOperationAction(ISD::BR_CC, MVT::f16, Promote);
setOperationAction(ISD::FADD, MVT::f16, Promote);
setOperationAction(ISD::FSUB, MVT::f16, Promote);
setOperationAction(ISD::FMUL, MVT::f16, Promote);
setOperationAction(ISD::FDIV, MVT::f16, Promote);
setOperationAction(ISD::FMA, MVT::f16, Promote);
setOperationAction(ISD::FNEG, MVT::f16, Promote);
setOperationAction(ISD::FABS, MVT::f16, Promote);
setOperationAction(ISD::FCEIL, MVT::f16, Promote);
setOperationAction(ISD::FSQRT, MVT::f16, Promote);
setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
setOperationAction(ISD::FRINT, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
setOperationAction(ISD::FABS, MVT::v8f16, Expand);
setOperationAction(ISD::FADD, MVT::v8f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
setOperationAction(ISD::FMA, MVT::v8f16, Expand);
setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
}
// AArch64 has implementations of a lot of roundinglike FP operations.
for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FMINNUM, Ty, Legal);
setOperationAction(ISD::FMAXNUM, Ty, Legal);
setOperationAction(ISD::FMINIMUM, Ty, Legal);
setOperationAction(ISD::FMAXIMUM, Ty, Legal);
setOperationAction(ISD::LROUND, Ty, Legal);
setOperationAction(ISD::LLROUND, Ty, Legal);
setOperationAction(ISD::LRINT, Ty, Legal);
setOperationAction(ISD::LLRINT, Ty, Legal);
}
if (Subtarget>hasFullFP16()) {
setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
setOperationAction(ISD::FCEIL, MVT::f16, Legal);
setOperationAction(ISD::FRINT, MVT::f16, Legal);
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
if (Subtarget>hasPerfMon())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
// Issue __sincos_stret if available.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
} else {
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
// Make floatingpoint constants legal for the large code model, so they don't
// become loads from the constant pool.
if (Subtarget>isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
}
// AArch64 does not have floatingpoint extending loads, i1 signextending
// load, floatingpoint truncating stores, or v2i32>v2i16 truncating store.
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
}
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f128, MVT::f80, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedLoadAction(im, MVT::i64, Legal);
setIndexedLoadAction(im, MVT::f64, Legal);
setIndexedLoadAction(im, MVT::f32, Legal);
setIndexedLoadAction(im, MVT::f16, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i64, Legal);
setIndexedStoreAction(im, MVT::f64, Legal);
setIndexedStoreAction(im, MVT::f32, Legal);
setIndexedStoreAction(im, MVT::f16, Legal);
}
// Trap.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
if (Subtarget>isTargetWindows())
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// We combine OR nodes for bitfield operations.
setTargetDAGCombine(ISD::OR);
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
// Vector add and sub nodes may conceal a highhalf opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);
if (Subtarget>supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::GlobalAddress);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset = Subtarget>requiresStrictAlign()
? MaxStoresPerMemsetOptSize : 32;
MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemcpy = Subtarget>requiresStrictAlign()
? MaxStoresPerMemcpyOptSize : 16;
MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
MaxLoadsPerMemcmpOptSize = 4;
MaxLoadsPerMemcmp = Subtarget>requiresStrictAlign()
? MaxLoadsPerMemcmpOptSize : 8;
setStackPointerRegisterToSaveRestore(AArch64::SP);
setSchedulingPreference(Sched::Hybrid);
EnableExtLdPromotion = true;
// Set required alignment.
setMinFunctionAlignment(llvm::Align(4));
// Set preferred alignments.
setPrefLoopAlignment(llvm::Align(1ULL << STI.getPrefLoopLogAlignment()));
setPrefFunctionAlignment(
llvm::Align(1ULL << STI.getPrefFunctionLogAlignment()));
// Only change the limit for entries in a jump table if specified by
// the sub target, but not at the command line.
unsigned MaxJT = STI.getMaximumJumpTableSize();
if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
setMaximumJumpTableSize(MaxJT);
setHasExtractBitsInsn(true);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget>hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
setOperationAction(ISD::FABS, MVT::v1f64, Expand);
setOperationAction(ISD::FADD, MVT::v1f64, Expand);
setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
setOperationAction(ISD::FMA, MVT::v1f64, Expand);
setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
setOperationAction(ISD::FREM, MVT::v1f64, Expand);
setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// AArch64 doesn't have a direct vector >f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
// i8 vector elements also need promotion to i32 for v8i8
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
// Similarly, there is no direct i32 > f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
// Or, direct i32 > f16 vector conversion. Set it so custom, so the
// conversion happens in two steps: v4i32 > v4f32 > v4f16
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
if (Subtarget>hasFullFP16()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
}
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quadvector types to detect MULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Vector reductions
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
}
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.
 for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8  VT == MVT::v8i16  VT == MVT::v4i32) {
setOperationAction(ISD::MULHS, VT, Legal);
setOperationAction(ISD::MULHU, VT, Legal);
} else {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
}
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
 for (MVT InnerVT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// AArch64 has implementations of a lot of roundinglike FP operations.
for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
}
if (Subtarget>hasFullFP16()) {
for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
}
}
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
}
PredictableSelectIsExpensive = Subtarget>predictableSelectIsExpensive();
}
void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
assert(VT.isVector() && "VT should be a vector type");
if (VT.isFloatingPoint()) {
MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
}
// Mark vector float intrinsics as expand.
if (VT == MVT::v2f32  VT == MVT::v4f32  VT == MVT::v2f64) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
// But we do support customlowering for FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
for (MVT InnerVT : MVT::all_valuetypes())
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
// CNT supports only B element sizes, then use UADDLP to widen.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);
// [SU][MINMAX] are available for all NEON types apart from i64.
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
// F[MINMAX][NUMNAN] are available for all FP NEON types.
if (VT.isFloatingPoint() &&
(VT.getVectorElementType() != MVT::f16  Subtarget>hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
setOperationAction(Opcode, VT, Legal);
if (Subtarget>isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
}
}
}
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR64RegClass);
addTypeForNEON(VT, MVT::v2i32);
}
void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR128RegClass);
addTypeForNEON(VT, MVT::v4i32);
}
EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
const APInt &Demanded,
TargetLowering::TargetLoweringOpt &TLO,
unsigned NewOpc) {
uint64_t OldImm = Imm, NewImm, Enc;
uint64_t Mask = ((uint64_t)(1LL) >> (64  Size)), OrigMask = Mask;
// Return if the immediate is already all zeros, all ones, a bimm32 or a
// bimm64.
if (Imm == 0  Imm == Mask 
AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
return false;
unsigned EltSize = Size;
uint64_t DemandedBits = Demanded.getZExtValue();
// Clear bits that are not demanded.
Imm &= DemandedBits;
while (true) {
// The goal here is to set the nondemanded bits in a way that minimizes
// the number of switching between 0 and 1. In order to achieve this goal,
// we set the nondemanded bits to the value of the preceding demanded bits.
// For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
// nondemanded bit), we copy bit0 (1) to the least significant 'x',
// bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
// The final result is 0b11000011.
uint64_t NonDemandedBits = ~DemandedBits;
uint64_t InvertedImm = ~Imm & DemandedBits;
uint64_t RotatedImm =
((InvertedImm << 1)  (InvertedImm >> (EltSize  1) & 1)) &
NonDemandedBits;
uint64_t Sum = RotatedImm + NonDemandedBits;
bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize  1));
uint64_t Ones = (Sum + Carry) & NonDemandedBits;
NewImm = (Imm  Ones) & Mask;
// If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
// or allones or allzeros, in which case we can stop searching. Otherwise,
// we halve the element size and continue the search.
if (isShiftedMask_64(NewImm)  isShiftedMask_64(~(NewImm  ~Mask)))
break;
// We cannot shrink the element size any further if it is 2bits.
if (EltSize == 2)
return false;
EltSize /= 2;
Mask >>= EltSize;
uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
// Return if there is mismatch in any of the demanded bits of Imm and Hi.
if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
return false;
// Merge the upper and lower halves of Imm and DemandedBits.
Imm = Hi;
DemandedBits = DemandedBitsHi;
}
++NumOptimizedImms;
// Replicate the element across the register width.
while (EltSize < Size) {
NewImm = NewImm << EltSize;
EltSize *= 2;
}
(void)OldImm;
assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered");
assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
// Create the new constant immediate node.
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue New;
// If the new constant immediate is allzeros or allones, let the target
// independent DAG combine optimize this node.
if (NewImm == 0  NewImm == OrigMask) {
New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
TLO.DAG.getConstant(NewImm, DL, VT));
// Otherwise, create a machine node so that target independent DAG combine
// doesn't undo this optimization.
} else {
Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
New = SDValue(
TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
}
return TLO.CombineTo(Op, New);
}
bool AArch64TargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
// Delay this optimization to as late as possible.
if (!TLO.LegalOps)
return false;
if (!EnableOptimizeLogicalImm)
return false;
EVT VT = Op.getValueType();
if (VT.isVector())
return false;
unsigned Size = VT.getSizeInBits();
assert((Size == 32  Size == 64) &&
"i32 or i64 is expected after legalization.");
// Exit early if we demand all bits.
if (Demanded.countPopulation() == Size)
return false;
unsigned NewOpc;
switch (Op.getOpcode()) {
default:
return false;
case ISD::AND:
NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
break;
case ISD::OR:
NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
break;
case ISD::XOR:
NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
break;
}
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;
uint64_t Imm = C>getZExtValue();
return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
}
/// computeKnownBitsForTargetNode  Determine which of the bits specified in
/// Mask are known to be either zero or one and return them Known.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known,
const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
case AArch64ISD::CSEL: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op>getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op>getOperand(1), Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
}
case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {
if (!Subtarget>isTargetILP32())
break;
// In ILP32 mode all valid pointers are in the low 4GB of the addressspace.
Known.Zero = APInt::getHighBitsSet(64, 32);
break;
}
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op>getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN>getZExtValue());
switch (IntID) {
default: return;
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
unsigned BitWidth = Known.getBitWidth();
EVT VT = cast<MemIntrinsicSDNode>(Op)>getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth  MemBits);
return;
}
}
break;
}
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID: {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))>getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv: {
// Figure out the datatype of the vector operand. The UMINV instruction
// will zero extend the result, so we can mark as known zero all the
// bits larger than the element datatype. 32bit or larget doesn't need
// this as those are legal types and will be handled by isel directly.
MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
unsigned BitWidth = Known.getBitWidth();
if (VT == MVT::v8i8  VT == MVT::v16i8) {
assert(BitWidth >= 8 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth  8);
Known.Zero = Mask;
} else if (VT == MVT::v4i16  VT == MVT::v8i16) {
assert(BitWidth >= 16 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth  16);
Known.Zero = Mask;
}
break;
} break;
}
}
}
}
MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
return MVT::i64;
}
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget>requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128bit ones.
*Fast = !Subtarget>isMisaligned128StoreSlow()  VT.getStoreSize() != 16 
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Align <= 2 
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on microbenchmarks and olden/bh.
VT == MVT::v2i64;
}
return true;
}
// Same as above but handling LLTs instead.
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget>requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128bit ones.
*Fast = !Subtarget>isMisaligned128StoreSlow() 
Ty.getSizeInBytes() != 16 
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Align <= 2 
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on microbenchmarks and olden/bh.
Ty == LLT::vector(2, 64);
}
return true;
}
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return AArch64::createFastISel(funcInfo, libInfo);
}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((AArch64ISD::NodeType)Opcode) {
case AArch64ISD::FIRST_NUMBER: break;
case AArch64ISD::CALL: return "AArch64ISD::CALL";
case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
case AArch64ISD::ADR: return "AArch64ISD::ADR";
case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
case AArch64ISD::DUP: return "AArch64ISD::DUP";
case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
case AArch64ISD::BICi: return "AArch64ISD::BICi";
case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
case AArch64ISD::BSL: return "AArch64ISD::BSL";
case AArch64ISD::NEG: return "AArch64ISD::NEG";
case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
case AArch64ISD::REV16: return "AArch64ISD::REV16";
case AArch64ISD::REV32: return "AArch64ISD::REV32";
case AArch64ISD::REV64: return "AArch64ISD::REV64";
case AArch64ISD::EXT: return "AArch64ISD::EXT";
case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
case AArch64ISD::STG: return "AArch64ISD::STG";
case AArch64ISD::STZG: return "AArch64ISD::STZG";
case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
}
return nullptr;
}
MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudoinstruction as some control flow and a
// phi node:
// OrigBB:
// [... previous instrs leading to comparison ...]
// b.ne TrueBB
// b EndBB
// TrueBB:
// ; Fallthrough
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
MachineFunction *MF = MBB>getParent();
const TargetInstrInfo *TII = Subtarget>getInstrInfo();
const BasicBlock *LLVM_BB = MBB>getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator It = ++MBB>getIterator();
Register DestReg = MI.getOperand(0).getReg();
Register IfTrueReg = MI.getOperand(1).getReg();
Register IfFalseReg = MI.getOperand(2).getReg();
unsigned CondCode = MI.getOperand(3).getImm();
bool NZCVKilled = MI.getOperand(4).isKill();
MachineBasicBlock *TrueBB = MF>CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndBB = MF>CreateMachineBasicBlock(LLVM_BB);
MF>insert(It, TrueBB);
MF>insert(It, EndBB);
// Transfer rest of current basicblock to EndBB
EndBB>splice(EndBB>begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
MBB>end());
EndBB>transferSuccessorsAndUpdatePHIs(MBB);
BuildMI(MBB, DL, TII>get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
BuildMI(MBB, DL, TII>get(AArch64::B)).addMBB(EndBB);
MBB>addSuccessor(TrueBB);
MBB>addSuccessor(EndBB);
// TrueBB falls through to the end.
TrueBB>addSuccessor(EndBB);
if (!NZCVKilled) {
TrueBB>addLiveIn(AArch64::NZCV);
EndBB>addLiveIn(AArch64::NZCV);
}
BuildMI(*EndBB, EndBB>begin(), DL, TII>get(AArch64::PHI), DestReg)
.addReg(IfTrueReg)
.addMBB(TrueBB)
.addReg(IfFalseReg)
.addMBB(MBB);
MI.eraseFromParent();
return EndBB;
}
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
MachineInstr &MI, MachineBasicBlock *BB) const {
assert(!isAsynchronousEHPersonality(classifyEHPersonality(
BB>getParent()>getFunction().getPersonalityFn())) &&
"SEH does not use catchret!");
return BB;
}
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
MachineInstr &MI, MachineBasicBlock *BB) const {
MI.eraseFromParent();
return BB;
}
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
default:
#ifndef NDEBUG
MI.dump();
#endif
llvm_unreachable("Unexpected instruction for custom inserter!");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case AArch64::CATCHPAD:
return EmitLoweredCatchPad(MI, BB);
}
}
//======//
// AArch64 Lowering private implementation.
//======//
//======//
// Lowering Code
//======//
/// changeIntCCToAArch64CC  Convert a DAG integer condition code to an AArch64
/// CC
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unknown condition code!");
case ISD::SETNE:
return AArch64CC::NE;
case ISD::SETEQ:
return AArch64CC::EQ;
case ISD::SETGT:
return AArch64CC::GT;
case ISD::SETGE:
return AArch64CC::GE;
case ISD::SETLT:
return AArch64CC::LT;
case ISD::SETLE:
return AArch64CC::LE;
case ISD::SETUGT:
return AArch64CC::HI;
case ISD::SETUGE:
return AArch64CC::HS;
case ISD::SETULT:
return AArch64CC::LO;
case ISD::SETULE:
return AArch64CC::LS;
}
}
/// changeFPCCToAArch64CC  Convert a DAG fp condition code to an AArch64 CC.
static void changeFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ:
CondCode = AArch64CC::EQ;
break;
case ISD::SETGT:
case ISD::SETOGT:
CondCode = AArch64CC::GT;
break;
case ISD::SETGE:
case ISD::SETOGE:
CondCode = AArch64CC::GE;
break;
case ISD::SETOLT:
CondCode = AArch64CC::MI;
break;
case ISD::SETOLE:
CondCode = AArch64CC::LS;
break;
case ISD::SETONE:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GT;
break;
case ISD::SETO:
CondCode = AArch64CC::VC;
break;
case ISD::SETUO:
CondCode = AArch64CC::VS;
break;
case ISD::SETUEQ:
CondCode = AArch64CC::EQ;
CondCode2 = AArch64CC::VS;
break;
case ISD::SETUGT:
CondCode = AArch64CC::HI;
break;
case ISD::SETUGE:
CondCode = AArch64CC::PL;
break;
case ISD::SETLT:
case ISD::SETULT:
CondCode = AArch64CC::LT;
break;
case ISD::SETLE:
case ISD::SETULE:
CondCode = AArch64CC::LE;
break;
case ISD::SETNE:
case ISD::SETUNE:
CondCode = AArch64CC::NE;
break;
}
}
/// Convert a DAG fp condition code to an AArch64 CC.
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
/// should be AND'ed instead of OR'ed.
static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
assert(CondCode2 == AArch64CC::AL);
break;
case ISD::SETONE:
// (a one b)
// == ((a olt b)  (a ogt b))
// == ((a ord b) && (a une b))
CondCode = AArch64CC::VC;
CondCode2 = AArch64CC::NE;
break;
case ISD::SETUEQ:
// (a ueq b)
// == ((a uno b)  (a oeq b))
// == ((a ule b) && (a uge b))
CondCode = AArch64CC::PL;
CondCode2 = AArch64CC::LE;
break;
}
}
/// changeVectorFPCCToAArch64CC  Convert a DAG fp condition code to an AArch64
/// CC usable with the vector instructions. Fewer operations are available
/// without a real NZCV register, so we have to use less efficient combinations
/// to get the same effect.
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2,
bool &Invert) {
Invert = false;
switch (CC) {
default:
// Mostly the scalar mappings work fine.
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
break;
case ISD::SETUO:
Invert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GE;
break;
case ISD::SETUEQ:
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETUGT:
case ISD::SETUGE:
// All of the comparemask comparisons are ordered, but we can switch
// between the two by a double inversion. E.g. ULE == !OGT.
Invert = true;
changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
break;
}
}
static bool isLegalArithImmed(uint64_t C) {
// Matches AArch64DAGToDAGISel::SelectArithImmed().
bool IsLegal = (C >> 12 == 0)  ((C & 0xFFFULL) == 0 && C >> 24 == 0);
LLVM_DEBUG(dbgs() << "Is imm " << C
<< " legal: " << (IsLegal ? "yes\n" : "no\n"));
return IsLegal;
}
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
// the grounds that "op1  (op2) == op1 + op2" ? Not always, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
//
// So, finally, the only LLVMnative comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
(CC == ISD::SETEQ  CC == ISD::SETNE);
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (VT.isFloatingPoint()) {
assert(VT != MVT::f128);
if (VT == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
VT = MVT::f32;
}
return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
}
// The CMP instruction is just an alias for SUBS, and representing it as
// SUBS means that it's possible to get CSE with subtract operations.
// A later phase can perform the optimization of setting the destination
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
if (isCMN(RHS, CC)) {
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (isCMN(LHS, CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
LHS = LHS.getOperand(1);
} else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
!isUnsignedIntSetCC(CC)) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
// of the signed comparisons.
Opcode = AArch64ISD::ANDS;
RHS = LHS.getOperand(1);
LHS = LHS.getOperand(0);
}
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
.getValue(1);
}
/// \defgroup AArch64CCMP CMP;CCMP matching
///
/// These functions deal with the formation of CMP;CCMP;... sequences.
/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
/// a comparison. They set the NZCV flags to a predefined value if their
/// predicate is false. This allows to express arbitrary conjunctions, for
/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
/// expressed as:
/// cmp A
/// ccmp B, inv(CB), CA
/// check for CB flags
///
/// This naturally lets us implement chains of AND operations with SETCC
/// operands. And we can even implement some other situations by transforming
/// them:
///  We can implement (NEG SETCC) i.e. negating a single comparison by
/// negating the flags used in a CCMP/FCCMP operations.
///  We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
/// by negating the flags we test for afterwards. i.e.
/// NEG (CMP CCMP CCCMP ...) can be implemented.
///  Note that we can only ever negate all previously processed results.
/// What we can not implement by flipping the flags to test is a negation
/// of two subtrees (because the negation affects all subtrees emitted so
/// far, so the 2nd subtree we emit would also affect the first).
/// With those tools we can implement some OR operations:
///  (OR (SETCC A) (SETCC B)) can be implemented via:
/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
///  After transforming OR to NEG/AND combinations we may be able to use NEG
/// elimination rules from earlier to implement the whole thing as a
/// CCMP/FCCMP chain.
///
/// As complete example:
/// or (or (setCA (cmp A)) (setCB (cmp B)))
/// (and (setCC (cmp C)) (setCD (cmp D)))"
/// can be reassociated to:
/// or (and (setCC (cmp C)) setCD (cmp D))
// (or (setCA (cmp A)) (setCB (cmp B)))
/// can be transformed to:
/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
/// which can be implemented as:
/// cmp C
/// ccmp D, inv(CD), CC
/// ccmp A, CA, inv(CD)
/// ccmp B, CB, inv(CA)
/// check for CB flags
///
/// A counterexample is "or (and A B) (and C D)" which translates to
/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
/// can only implement 1 of the inner (not) operations, but not both!
/// @{
/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue CCOp,
AArch64CC::CondCode Predicate,
AArch64CC::CondCode OutCC,
const SDLoc &DL, SelectionDAG &DAG) {
unsigned Opcode = 0;
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (LHS.getValueType().isFloatingPoint()) {
assert(LHS.getValueType() != MVT::f128);
if (LHS.getValueType() == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
}
Opcode = AArch64ISD::FCCMP;
} else if (RHS.getOpcode() == ISD::SUB) {
SDValue SubOp0 = RHS.getOperand(0);
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ  CC == ISD::SETNE)) {
// See emitComparison() on why we can only do this for SETEQ and SETNE.
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
}
}
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
}
/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
/// expressed as a conjunction. See \ref AArch64CCMP.
/// \param CanNegate Set to true if we can negate the whole subtree just by
/// changing the conditions on the SETCC tests.
/// (this means we can call emitConjunctionRec() with
/// Negate==true on this subtree)
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
/// cannot do the negation naturally. We are required to
/// emit the subtree first in this case.
/// \param WillNegate Is true if are called when the result of this
/// subexpression must be negated. This happens when the
/// outer expression is an OR. We can use this fact to know
/// that we have a double negation (or (or ...) ...) that
/// can be implemented for free.
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
bool &MustBeFirst, bool WillNegate,
unsigned Depth = 0) {
if (!Val.hasOneUse())
return false;
unsigned Opcode = Val>getOpcode();
if (Opcode == ISD::SETCC) {
if (Val>getOperand(0).getValueType() == MVT::f128)
return false;
CanNegate = true;
MustBeFirst = false;
return true;
}
// Protect against exponential runtime and stack overflow.
if (Depth > 6)
return false;
if (Opcode == ISD::AND  Opcode == ISD::OR) {
bool IsOR = Opcode == ISD::OR;
SDValue O0 = Val>getOperand(0);
SDValue O1 = Val>getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
return false;
bool CanNegateR;
bool MustBeFirstR;
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
return false;
if (MustBeFirstL && MustBeFirstR)
return false;
if (IsOR) {
// For an OR expression we need to be able to naturally negate at least
// one side or we cannot do the transformation at all.
if (!CanNegateL && !CanNegateR)
return false;
// If we the result of the OR will be negated and we can naturally negate
// the leafs, then this subtree as a whole negates naturally.
CanNegate = WillNegate && CanNegateL && CanNegateR;
// If we cannot naturally negate the whole subtree, then this must be
// emitted first.
MustBeFirst = !CanNegate;
} else {
assert(Opcode == ISD::AND && "Must be OR or AND");
// We cannot naturally negate an AND operation.
CanNegate = false;
MustBeFirst = MustBeFirstL  MustBeFirstR;
}
return true;
}
return false;
}
/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
/// Tries to transform the given i1 producing node @p Val to a series compare
/// and conditional compare operations. @returns an NZCV flags producing node
/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
/// transformation was not possible.
/// \p Negate is true if we want this subtree being negated just by changing
/// SETCC conditions.
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
AArch64CC::CondCode Predicate) {
// We're at a tree leaf, produce a conditional comparison operation.
unsigned Opcode = Val>getOpcode();
if (Opcode == ISD::SETCC) {
SDValue LHS = Val>getOperand(0);
SDValue RHS = Val>getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Val>getOperand(2))>get();
bool isInteger = LHS.getValueType().isInteger();
if (Negate)
CC = getSetCCInverse(CC, isInteger);
SDLoc DL(Val);
// Determine OutCC and handle FP special case.
if (isInteger) {
OutCC = changeIntCCToAArch64CC(CC);
} else {
assert(LHS.getValueType().isFloatingPoint());
AArch64CC::CondCode ExtraCC;
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
// Some floating point conditions can't be tested with a single condition
// code. Construct an additional comparison in this case.
if (ExtraCC != AArch64CC::AL) {
SDValue ExtraCmp;
if (!CCOp.getNode())
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
else
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
ExtraCC, DL, DAG);
CCOp = ExtraCmp;
Predicate = ExtraCC;
}
}
// Produce a normal comparison if we are first in the chain
if (!CCOp)
return emitComparison(LHS, RHS, CC, DL, DAG);
// Otherwise produce a ccmp.
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
DAG);
}
assert(Val>hasOneUse() && "Valid conjunction/disjunction tree");
bool IsOR = Opcode == ISD::OR;
SDValue LHS = Val>getOperand(0);
bool CanNegateL;
bool MustBeFirstL;
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
SDValue RHS = Val>getOperand(1);
bool CanNegateR;
bool MustBeFirstR;
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
// Swap subtree that must come first to the right side.
if (MustBeFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
std::swap(MustBeFirstL, MustBeFirstR);
}
bool NegateR;
bool NegateAfterR;
bool NegateL;
bool NegateAfterAll;
if (Opcode == ISD::OR) {
// Swap the subtree that we can negate naturally to the left.
if (!CanNegateL) {
assert(CanNegateR && "at least one side must be negatable");
assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
assert(!Negate);
std::swap(LHS, RHS);
NegateR = false;
NegateAfterR = true;
} else {
// Negate the left subtree if possible, otherwise negate the result.
NegateR = CanNegateR;
NegateAfterR = !CanNegateR;
}
NegateL = true;
NegateAfterAll = !Negate;
} else {
assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
assert(!Negate && "Valid conjunction/disjunction tree");
NegateL = false;
NegateR = false;
NegateAfterR = false;
NegateAfterAll = false;
}
// Emit subtrees.
AArch64CC::CondCode RHSCC;
SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
if (NegateAfterR)
RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
if (NegateAfterAll)
OutCC = AArch64CC::getInvertedCondCode(OutCC);
return CmpL;
}
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
/// In some cases this is even possible with OR operations in the expression.
/// See \ref AArch64CCMP.
/// \see emitConjunctionRec().
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC) {
bool DummyCanNegate;
bool DummyMustBeFirst;
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
return SDValue();
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
}
/// @}
/// Returns how profitable it is to fold a comparison's operand's shift and/or
/// extension operations.
static unsigned getCmpOperandFoldingProfit(SDValue Op) {
auto isSupportedExtend = [&](SDValue V) {
if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
return true;
if (V.getOpcode() == ISD::AND)
if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
uint64_t Mask = MaskCst>getZExtValue();
return (Mask == 0xFF  Mask == 0xFFFF  Mask == 0xFFFFFFFF);
}
return false;
};
if (!Op.hasOneUse())
return 0;
if (isSupportedExtend(Op))
return 1;
unsigned Opc = Op.getOpcode();
if (Opc == ISD::SHL  Opc == ISD::SRL  Opc == ISD::SRA)
if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
uint64_t Shift = ShiftCst>getZExtValue();
if (isSupportedExtend(Op.getOperand(0)))
return (Shift <= 4) ? 2 : 1;
EVT VT = Op.getValueType();
if ((VT == MVT::i32 && Shift <= 31)  (VT == MVT::i64 && Shift <= 63))
return 1;
}
return 0;
}
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC>getZExtValue();
if (!isLegalArithImmed(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default:
break;
case ISD::SETLT:
case ISD::SETGE:
if ((VT == MVT::i32 && C != 0x80000000 &&
isLegalArithImmed((uint32_t)(C  1))) 
(VT == MVT::i64 && C != 0x80000000ULL &&
isLegalArithImmed(C  1ULL))) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
C = (VT == MVT::i32) ? (uint32_t)(C  1) : C  1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
if ((VT == MVT::i32 && C != 0 &&
isLegalArithImmed((uint32_t)(C  1))) 
(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C  1ULL))) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
C = (VT == MVT::i32) ? (uint32_t)(C  1) : C  1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETLE:
case ISD::SETGT:
if ((VT == MVT::i32 && C != INT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) 
(VT == MVT::i64 && C != INT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
if ((VT == MVT::i32 && C != UINT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) 
(VT == MVT::i64 && C != UINT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
}
}
}
// Comparisons are canonicalized so that the RHS operand is simpler than the
// LHS one, the extreme case being when RHS is an immediate. However, AArch64
// can fold some shift+extend operations on the RHS operand, so swap the
// operands if that can be done.
//
// For example:
// lsl w13, w11, #1
// cmp w13, w12
// can be turned into:
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) 
!isLegalArithImmed(cast<ConstantSDNode>(RHS)>getZExtValue())) {
SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
}
}
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ  CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
// For the i8 operand, the largest immediate is 255, so this can be easily
// encoded in the compare instruction. For the i16 operand, however, the
// largest immediate cannot be encoded in the compare.
// Therefore, use a sign extending load and cmn to avoid materializing the
// 1 constant. For example,
// movz w1, #65535
// ldrh w0, [x0, #0]
// cmp w0, w1
// >
// ldrsh w0, [x0, #0]
// cmn w0, #1
// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
// if and only if (sext LHS) == (sext RHS). The checks are in place to
// ensure both the LHS and RHS are truly zero extended and to make sure the
// transformation is profitable.
if ((RHSC>getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
cast<LoadSDNode>(LHS)>getExtensionType() == ISD::ZEXTLOAD &&
cast<LoadSDNode>(LHS)>getMemoryVT() == MVT::i16 &&
LHS.getNode()>hasNUsesOfValue(1, 0)) {
int16_t ValueofRHS = cast<ConstantSDNode>(RHS)>getZExtValue();
if (ValueofRHS < 0 && isLegalArithImmed(ValueofRHS)) {
SDValue SExt =
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
DAG.getValueType(MVT::i16));
Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
RHS.getValueType()),
CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
}
if (!Cmp && (RHSC>isNullValue()  RHSC>isOne())) {
if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
if ((CC == ISD::SETNE) ^ RHSC>isNullValue())
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
}
}
if (!Cmp) {
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
static std::pair<SDValue, SDValue>
getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
assert((Op.getValueType() == MVT::i32  Op.getValueType() == MVT::i64) &&
"Unsupported value type");
SDValue Value, Overflow;
SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
unsigned Opc = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown overflow instruction!");
case ISD::SADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::VS;
break;
case ISD::UADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::HS;
break;
case ISD::SSUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::VS;
break;
case ISD::USUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::LO;
break;
// Multiply needs a little bit extra work.
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
// For a 32 bit multiply with overflow check we want the instruction
// selector to generate a widening multiply (SMADDL/UMADDL). For that we
// need to generate the following pattern:
// (i64 add 0, (i64 mul (i64 sextzext i32 %a), (i64 sextzext i32 %b))
LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
DAG.getConstant(0, DL, MVT::i64));
// On AArch64 the upper 32 bits are always zero extended for a 32 bit
// operation. We need to clear out the upper 32 bits, because we used a
// widening multiply that wrote all 64 bits. In the end this should be a
// noop.
Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
if (IsSigned) {
// The signed overflow check requires more than just a simple check for
// any bit set in the upper 32 bits of the result. These bits could be
// just the sign bits of a negative number. To perform the overflow
// check we have to arithmetic shift right the 32nd bit of the result by
// 31 bits. Then we compare the result to the upper 32 bits.
SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
DAG.getConstant(32, DL, MVT::i64));
UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
DAG.getConstant(31, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
// The overflow check for unsigned multiply is easy. We only need to
// check if any of the upper 32 bits are set. This can be done with a
// CMP (shifted register). For that we need to generate the following
// pattern:
// (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
DAG.getConstant(32, DL, MVT::i64));
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
}
assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
// For the 64 bit multiply
Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
if (IsSigned) {
SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
}
} // switch (...)
if (Opc) {
SDVTList VTs = DAG.getVTList(Op>getValueType(0), MVT::i32);
// Emit the AArch64 operation with overflow check.
Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
Overflow = Value.getValue(1);
}
return std::make_pair(Value, Overflow);
}
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
SmallVector<SDValue, 2> Ops(Op>op_begin(), Op>op_end());
MakeLibCallOptions CallOptions;
return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
}
// Returns true if the given Op is the overflow flag result of an overflow
// intrinsic operation.
static bool isOverflowIntrOpRes(SDValue Op) {
unsigned Opc = Op.getOpcode();
return (Op.getResNo() == 1 &&
(Opc == ISD::SADDO  Opc == ISD::UADDO  Opc == ISD::SSUBO 
Opc == ISD::USUBO  Opc == ISD::SMULO  Opc == ISD::UMULO));
}
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
SDLoc dl(Sel);
// If the operand is an overflow checking operation, invert the condition
// code and kill the Not operation. I.e., transform:
// (xor (overflow_op_bool, 1))
// >
// (csel 1, 0, invert(cc), overflow_op_bool)
// ... which later gets transformed to just a cset instruction with an
// inverted condition code, rather than a cset + eor sequence.
if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel>getValueType(0)))
return SDValue();
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
AArch64CC::CondCode CC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
// If neither operand is a SELECT_CC, give up.
if (Sel.getOpcode() != ISD::SELECT_CC)
std::swap(Sel, Other);
if (Sel.getOpcode() != ISD::SELECT_CC)
return Op;
// The folding we want to perform is:
// (xor x, (select_cc a, b, cc, 0, 1) )
// >
// (csel x, (xor x, 1), cc ...)
//
// The latter will get matched to a CSINV instruction.
ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))>get();
SDValue LHS = Sel.getOperand(0);
SDValue RHS = Sel.getOperand(1);
SDValue TVal = Sel.getOperand(2);
SDValue FVal = Sel.getOperand(3);
// FIXME: This could be generalized to noninteger comparisons.
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return Op;
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
// The values aren't constants, this isn't the pattern we're looking for.
if (!CFVal  !CTVal)
return Op;
// We can commute the SELECT_CC by inverting the condition. This
// might be needed to make this fit into a CSINV pattern.
if (CTVal>isAllOnesValue() && CFVal>isNullValue()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, true);
}
// If the constants line up, perform the transform!
if (CTVal>isNullValue() && CFVal>isAllOnesValue()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
FVal = Other;
TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
DAG.getConstant(1ULL, dl, Other.getValueType()));
return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
CCVal, Cmp);
}
return Op;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Invalid code");
case ISD::ADDC:
Opc = AArch64ISD::ADDS;
break;
case ISD::SUBC:
Opc = AArch64ISD::SUBS;
break;
case ISD::ADDE:
Opc = AArch64ISD::ADCS;
ExtraOp = true;
break;
case ISD::SUBE:
Opc = AArch64ISD::SBCS;
ExtraOp = true;
break;
}
if (!ExtraOp)
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
Op.getOperand(2));
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
SDLoc dl(Op);
AArch64CC::CondCode CC;
// The actual operation that sets the overflow or carry flag.
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
// We use an inverted condition, because the conditional select is inverted
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
CCVal, Overflow);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
// Prefetch operands are:
// 1: Address to prefetch
// 2: bool isWrite
// 3: int locality (0 = no locality ... 3 = extreme locality)
// 4: bool isDataCache
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))>getZExtValue();
unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))>getZExtValue();
unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))>getZExtValue();
bool IsStream = !Locality;
// When the locality number is set
if (Locality) {
// The frontend should have filtered out the outofrange values
assert(Locality <= 3 && "Prefetch locality outofrange");
// The locality degree is the opposite of the cache speed.
// Put the number the other way around.
// The encoding starts at 0 for level 1
Locality = 3  Locality;
}
// built the mask value encoding the expected behavior.
unsigned PrfOp = (IsWrite << 4)  // Load/Store bit
(!IsData << 3)  // IsDataCache bit
(Locality << 1)  // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
RTLIB::Libcall LC;
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
return LowerF128Call(Op, DAG, LC);
}
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
// FP_ROUND node has a second operand indicating whether it is known to be
// precise. That doesn't take part in the LibCall so we can't directly use
// LowerF128Call.
SDValue SrcVal = Op.getOperand(0);
MakeLibCallOptions CallOptions;
return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions,
SDLoc(Op)).first;
}
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (InVT.getVectorElementType() == MVT::f16 &&
!Subtarget>hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
}
if (VT.getSizeInBits() < InVT.getSizeInBits()) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
}
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
SDLoc dl(Op);
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
// Type changing conversions are illegal.
return Op;
}
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getOperand(0).getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (Op.getOperand(0).getValueType() == MVT::f16 &&
!Subtarget>hasFullFP16()) {
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
}
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
SmallVector<SDValue, 2> Ops(Op>op_begin(), Op>op_end());
MakeLibCallOptions CallOptions;
return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
if (VT.getSizeInBits() < InVT.getSizeInBits()) {
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
unsigned CastOpc =
Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
return DAG.getNode(Op.getOpcode(), dl, VT, In);
}
return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (Op.getValueType() == MVT::f16 &&
!Subtarget>hasFullFP16()) {
SDLoc dl(Op);
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
DAG.getIntPtrConstant(0, dl));
}
// i128 conversions are libcalls.
if (Op.getOperand(0).getValueType() == MVT::i128)
return SDValue();
// Other conversions are legal, unless it's to the completely softwarebased
// fp128.
if (Op.getValueType() != MVT::f128)
return Op;
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
return LowerF128Call(Op, DAG, LC);
}
SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
SelectionDAG &DAG) const {
// For iOS, we want to call an alternative entry point: __sincos_stret,
// which returns the values in two S / D registers.
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
: RTLIB::SINCOS_STRET_F32;
const char *LibcallName = getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
if (Op.getValueType() != MVT::f16)
return SDValue();
assert(Op.getOperand(0).getValueType() == MVT::i16);
SDLoc DL(Op);
Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
return SDValue(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
}
static EVT getExtensionTo64Bits(const EVT &OrigVT) {
if (OrigVT.getSizeInBits() >= 64)
return OrigVT;
assert(OrigVT.isSimple() && "Expecting a simple value type");
MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
switch (OrigSimpleTy) {
default: llvm_unreachable("Unexpected Vector Type");
case MVT::v2i8:
case MVT::v2i16:
return MVT::v2i32;
case MVT::v4i8:
return MVT::v4i16;
}
}
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
const EVT &OrigTy,
const EVT &ExtTy,
unsigned ExtOpcode) {
// The vector originally had a size of OrigTy. It was then extended to ExtTy.
// We expect the ExtTy to be 128bits total. If the OrigTy is less than
// 64bits we need to insert a new extension so that it will be 64bits.
assert(ExtTy.is128BitVector() && "Unexpected extension size");
if (OrigTy.getSizeInBits() >= 64)
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);
return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
bool isSigned) {
EVT VT = N>getValueType(0);
if (N>getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Elt : N>op_values()) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
unsigned EltSize = VT.getScalarSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
if (!isIntN(HalfSize, C>getSExtValue()))
return false;
} else {
if (!isUIntN(HalfSize, C>getZExtValue()))
return false;
}
continue;
}
return false;
}
return true;
}
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
if (N>getOpcode() == ISD::SIGN_EXTEND  N>getOpcode() == ISD::ZERO_EXTEND)
return addRequiredExtensionForVectorMULL(N>getOperand(0), DAG,
N>getOperand(0)>getValueType(0),
N>getValueType(0),
N>getOpcode());
assert(N>getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N>getValueType(0);
SDLoc dl(N);
unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N>getOperand(i));
const APInt &CInt = C>getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
}
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return N>getOpcode() == ISD::SIGN_EXTEND 
isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return N>getOpcode() == ISD::ZERO_EXTEND 
isExtendedBUILD_VECTOR(N, DAG, false);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N>getOpcode();
if (Opcode == ISD::ADD  Opcode == ISD::SUB) {
SDNode *N0 = N>getOperand(0).getNode();
SDNode *N1 = N>getOperand(1).getNode();
return N0>hasOneUse() && N1>hasOneUse() &&
isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
}
return false;
}
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N>getOpcode();
if (Opcode == ISD::ADD  Opcode == ISD::SUB) {
SDNode *N0 = N>getOperand(0).getNode();
SDNode *N1 = N>getOperand(1).getNode();
return N0>hasOneUse() && N1>hasOneUse() &&
isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
}
return false;
}
SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
// The rounding mode is in bits 23:22 of the FPSCR.
// The ARM rounding mode value to FLT_ROUNDS mapping is 0>1, 1>2, 2>3, 3>0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
MVT::i64));
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
}
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Multiplications are only customlowered for 128bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
EVT VT = Op.getValueType();
assert(VT.is128BitVector() && VT.isInteger() &&
"unexpected type for customlowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
bool isMLA = false;
bool isN0SExt = isSignExtended(N0, DAG);
bool isN1SExt = isSignExtended(N1, DAG);
if (isN0SExt && isN1SExt)
NewOpc = AArch64ISD::SMULL;
else {
bool isN0ZExt = isZeroExtended(N0, DAG);
bool isN1ZExt = isZeroExtended(N1, DAG);
if (isN0ZExt && isN1ZExt)
NewOpc = AArch64ISD::UMULL;
else if (isN1SExt  isN1ZExt) {
// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
if (isN1SExt && isAddSubSExt(N0, DAG)) {
NewOpc = AArch64ISD::SMULL;
isMLA = true;
} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
NewOpc = AArch64ISD::UMULL;
isMLA = true;
} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
std::swap(N0, N1);
NewOpc = AArch64ISD::UMULL;
isMLA = true;
}
}
if (!NewOpc) {
if (VT == MVT::v2i64)
// Fall through to expand this. It is not legal.
return SDValue();
else
// Other vector multiplications are legal.
return Op;
}
}
// Legalize to a S/UMULL instruction
SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
if (!isMLA) {
Op0 = skipExtensionForVectorMULL(N0, DAG);
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
}
// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
// isel lowering to take advantage of nostall back to back s/umul + s/umla.
// This is true for CPUs with accumulate forwarding such as CortexA53/A57
SDValue N00 = skipExtensionForVectorMULL(N0>getOperand(0).getNode(), DAG);
SDValue N01 = skipExtensionForVectorMULL(N0>getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(N0>getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))>getZExtValue();
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::aarch64_neon_abs: {
EVT Ty = Op.getValueType();
if (Ty == MVT::i64) {
SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
Op.getOperand(1));
Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
} else {
report_fatal_error("Unexpected type for AArch64 NEON intrinic");
}
}
case Intrinsic::aarch64_neon_smax:
return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umax:
return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_smin:
return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umin:
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget>getRegisterInfo();
unsigned Reg = RegInfo>getLocalAddressRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
Op.getSimpleValueType());
}
case Intrinsic::eh_recoverfp: {
// FIXME: This needs to be implemented to correctly handle highly aligned
// stack objects. For now we simply return the incoming FP. Refer D53541
// for more details.
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD>getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
}
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
EVT VT, EVT MemVT,
SelectionDAG &DAG) {
assert(VT.isVector() && "VT should be a vector type");
assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
SDValue Value = ST>getValue();
// It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
// the word lane which represent the v4i8 subvector. It optimizes the store
// to:
//
// xtn v0.8b, v0.8h
// str s0, [x0]
SDValue Undef = DAG.getUNDEF(MVT::i16);
SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
{Undef, Undef, Undef, Undef});
SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
Value, UndefVec);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Trunc, DAG.getConstant(0, DL, MVT::i64));
return DAG.getStore(ST>getChain(), DL, ExtractTrunc,
ST>getBasePtr(), ST>getMemOperand());
}
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8.
SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc Dl(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
assert (StoreNode && "Can only custom lower store nodes");
SDValue Value = StoreNode>getValue();
EVT VT = Value.getValueType();
EVT MemVT = StoreNode>getMemoryVT();
assert (VT.isVector() && "Can only custom lower vector store types");
unsigned AS = StoreNode>getAddressSpace();
unsigned Align = StoreNode>getAlignment();
if (Align < MemVT.getStoreSize() &&
!allowsMisalignedMemoryAccesses(
MemVT, AS, Align, StoreNode>getMemOperand()>getFlags(), nullptr)) {
return scalarizeVectorStore(StoreNode, DAG);
}
if (StoreNode>isTruncatingStore()) {
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
LLVM_DEBUG(Op.dump());
switch (Op.getOpcode()) {
default:
llvm_unreachable("unimplemented operand");
return SDValue();
case ISD::BITCAST:
return LowerBITCAST(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
return LowerGlobalTLSAddress(Op, DAG);
case ISD::SETCC:
return LowerSETCC(Op, DAG);
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
case ISD::SELECT:
return LowerSELECT(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
case ISD::BR_JT:
return LowerBR_JT(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
case ISD::BlockAddress:
return LowerBlockAddress(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
case ISD::VACOPY:
return LowerVACOPY(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE:
return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
case ISD::FSUB:
return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
case ISD::FMUL:
return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
case ISD::FDIV:
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FP_ROUND:
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
case ISD::SPONENTRY:
return LowerSPONENTRY(Op, DAG);
case ISD::RETURNADDR:
return LowerRETURNADDR(Op, DAG);
case ISD::ADDROFRETURNADDR:
return LowerADDROFRETURNADDR(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
return LowerVectorSRA_SRL_SHL(Op, DAG);
case ISD::SHL_PARTS:
return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
return LowerShiftRightParts(Op, DAG);
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
case ISD::OR:
return LowerVectorOR(Op, DAG);
case ISD::XOR:
return LowerXOR(Op, DAG);
case ISD::PREFETCH:
return LowerPREFETCH(Op, DAG);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return LowerFP_TO_INT(Op, DAG);
case ISD::FSINCOS:
return LowerFSINCOS(Op, DAG);
case ISD::FLT_ROUNDS_:
return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:
return LowerSTORE(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
return LowerVECREDUCE(Op, DAG);
case ISD::ATOMIC_LOAD_SUB:
return LowerATOMIC_LOAD_SUB(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
}
//======//
// Calling Convention Implementation
//======//
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
return CC_AArch64_GHC;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
if (Subtarget>isTargetWindows() && IsVarArg)
return CC_AArch64_Win64_VarArg;
if (!Subtarget>isTargetDarwin())
return CC_AArch64_AAPCS;
if (!IsVarArg)
return CC_AArch64_DarwinPCS;
return Subtarget>isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
: CC_AArch64_DarwinPCS_VarArg;
case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
case CallingConv::AArch64_VectorCall:
return CC_AArch64_AAPCS;
}
}
CCAssignFn *
AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
}
SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool IsWin64 = Subtarget>isCallingConvWin64(MF.getFunction().getCallingConv());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
// Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
// we use a special version of AnalyzeFormalArguments to pass in ValVT and
// LocVT.
unsigned NumArgs = Ins.size();
Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
if (Ins[i].isOrigArg()) {
std::advance(CurOrigArg, Ins[i].getOrigArgIndex()  CurArgIdx);
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg>getType(),
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1  ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
}
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
assert(ArgLocs.size() == Ins.size());
SmallVector<SDValue, 16> ArgValues;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
// noncompliant manner for larger structs.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
// FIXME: This works on bigendian for composite byvals, which are the common
// case. It should also work for fundamental types too.
unsigned FrameIdx =
MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
continue;
}
SDValue ArgValue;
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = &AArch64::GPR32RegClass;
else if (RegVT == MVT::i64)
RC = &AArch64::GPR64RegClass;
else if (RegVT == MVT::f16)
RC = &AArch64::FPR16RegClass;
else if (RegVT == MVT::f32)
RC = &AArch64::FPR32RegClass;
else if (RegVT == MVT::f64  RegVT.is64BitVector())
RC = &AArch64::FPR64RegClass;
else if (RegVT == MVT::f128  RegVT.is128BitVector())
RC = &AArch64::FPR128RegClass;
else if (RegVT.isScalableVector() &&
RegVT.getVectorElementType() == MVT::i1)
RC = &AArch64::PPRRegClass;
else if (RegVT.isScalableVector())
RC = &AArch64::ZPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8, 16 or 32bit value, it is really passed promoted
// to 64 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
llvm_unreachable("Spilling of SVE vectors not yet implemented");
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
break;
case CCValAssign::AExt:
case CCValAssign::SExt:
case CCValAssign::ZExt:
break;
case CCValAssign::AExtUpper:
ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
DAG.getConstant(32, DL, RegVT));
ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
break;
}
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
uint32_t BEAlign = 0;
if (!Subtarget>isLittleEndian() && ArgSize < 8 &&
!Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8  ArgSize;
int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
MVT MemVT = VA.getValVT();
switch (VA.getLocInfo()) {
default:
break;
case CCValAssign::Trunc:
case CCValAssign::BCvt:
MemVT = VA.getLocVT();
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
llvm_unreachable("Spilling of SVE vectors not yet implemented");
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
case CCValAssign::ZExt:
ExtType = ISD::ZEXTLOAD;
break;
case CCValAssign::AExt:
ExtType = ISD::EXTLOAD;
break;
}
ArgValue = DAG.getExtLoad(
ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
MemVT);