diff git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 3293c29a05fb..1d80e25c35a8 100644
 a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ 1,199 +1,199 @@
//=== ValueTypes.td  ValueType definitions * tablegen *===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDXLicenseIdentifier: Apache2.0 WITH LLVMexception
//
//======//
//
// Value types  These values correspond to the register types defined in the
// MachineValueTypes.h file. If you update anything here, you must update it
// there as well!
//
//======//
class ValueType<int size, int value> {
string Namespace = "MVT";
int Size = size;
int Value = value;
}
def OtherVT: ValueType<0 , 1>; // "Other" value
def i1 : ValueType<1 , 2>; // One bit boolean value
def i8 : ValueType<8 , 3>; // 8bit integer value
def i16 : ValueType<16 , 4>; // 16bit integer value
def i32 : ValueType<32 , 5>; // 32bit integer value
def i64 : ValueType<64 , 6>; // 64bit integer value
def i128 : ValueType<128, 7>; // 128bit integer value
def f16 : ValueType<16 , 8>; // 16bit floating point value
def f32 : ValueType<32 , 9>; // 32bit floating point value
def f64 : ValueType<64 , 10>; // 64bit floating point value
def f80 : ValueType<80 , 11>; // 80bit floating point value
def f128 : ValueType<128, 12>; // 128bit floating point value
def ppcf128: ValueType<128, 13>; // PPC 128bit floating point value
def v1i1 : ValueType<1 , 14>; // 1 x i1 vector value
def v2i1 : ValueType<2 , 15>; // 2 x i1 vector value
def v4i1 : ValueType<4 , 16>; // 4 x i1 vector value
def v8i1 : ValueType<8 , 17>; // 8 x i1 vector value
def v16i1 : ValueType<16, 18>; // 16 x i1 vector value
def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value
def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value
def v128i1 : ValueType<128, 21>; // 128 x i1 vector value
def v512i1 : ValueType<512, 22>; // 512 x i1 vector value
def v1024i1: ValueType<1024,23>; //1024 x i1 vector value
def v1i8 : ValueType<8, 24>; // 1 x i8 vector value
def v2i8 : ValueType<16 , 25>; // 2 x i8 vector value
def v4i8 : ValueType<32 , 26>; // 4 x i8 vector value
def v8i8 : ValueType<64 , 27>; // 8 x i8 vector value
def v16i8 : ValueType<128, 28>; // 16 x i8 vector value
def v32i8 : ValueType<256, 29>; // 32 x i8 vector value
def v64i8 : ValueType<512, 30>; // 64 x i8 vector value
def v128i8 : ValueType<1024,31>; //128 x i8 vector value
def v256i8 : ValueType<2048,32>; //256 x i8 vector value
def v1i16 : ValueType<16 , 33>; // 1 x i16 vector value
def v2i16 : ValueType<32 , 34>; // 2 x i16 vector value
def v3i16 : ValueType<48 , 35>; // 3 x i16 vector value
def v4i16 : ValueType<64 , 36>; // 4 x i16 vector value
def v8i16 : ValueType<128, 37>; // 8 x i16 vector value
def v16i16 : ValueType<256, 38>; // 16 x i16 vector value
def v32i16 : ValueType<512, 39>; // 32 x i16 vector value
def v64i16 : ValueType<1024,40>; // 64 x i16 vector value
def v128i16: ValueType<2048,41>; //128 x i16 vector value
def v1i32 : ValueType<32 , 42>; // 1 x i32 vector value
def v2i32 : ValueType<64 , 43>; // 2 x i32 vector value
def v3i32 : ValueType<96 , 44>; // 3 x i32 vector value
def v4i32 : ValueType<128, 45>; // 4 x i32 vector value
def v5i32 : ValueType<160, 46>; // 5 x i32 vector value
def v8i32 : ValueType<256, 47>; // 8 x i32 vector value
def v16i32 : ValueType<512, 48>; // 16 x i32 vector value
def v32i32 : ValueType<1024,49>; // 32 x i32 vector value
def v64i32 : ValueType<2048,50>; // 64 x i32 vector value
def v128i32 : ValueType<4096,51>; // 128 x i32 vector value
def v256i32 : ValueType<8182,52>; // 256 x i32 vector value
def v512i32 : ValueType<16384,53>; // 512 x i32 vector value
def v1024i32 : ValueType<32768,54>; // 1024 x i32 vector value
def v2048i32 : ValueType<65536,55>; // 2048 x i32 vector value
def v1i64 : ValueType<64 , 56>; // 1 x i64 vector value
def v2i64 : ValueType<128, 57>; // 2 x i64 vector value
def v4i64 : ValueType<256, 58>; // 4 x i64 vector value
def v8i64 : ValueType<512, 59>; // 8 x i64 vector value
def v16i64 : ValueType<1024,60>; // 16 x i64 vector value
def v32i64 : ValueType<2048,61>; // 32 x i64 vector value
def v1i128 : ValueType<128, 62>; // 1 x i128 vector value
def nxv1i1 : ValueType<1, 63>; // n x 1 x i1 vector value
def nxv2i1 : ValueType<2, 64>; // n x 2 x i1 vector value
def nxv4i1 : ValueType<4, 65>; // n x 4 x i1 vector value
def nxv8i1 : ValueType<8, 66>; // n x 8 x i1 vector value
def nxv16i1 : ValueType<16, 67>; // n x 16 x i1 vector value
def nxv32i1 : ValueType<32, 68>; // n x 32 x i1 vector value

def nxv1i8 : ValueType<8, 69>; // n x 1 x i8 vector value
def nxv2i8 : ValueType<16, 70>; // n x 2 x i8 vector value
def nxv4i8 : ValueType<32, 71>; // n x 4 x i8 vector value
def nxv8i8 : ValueType<64, 72>; // n x 8 x i8 vector value
def nxv16i8 : ValueType<128, 73>; // n x 16 x i8 vector value
def nxv32i8 : ValueType<256, 74>; // n x 32 x i8 vector value

def nxv1i16 : ValueType<16, 75>; // n x 1 x i16 vector value
def nxv2i16 : ValueType<32, 76>; // n x 2 x i16 vector value
def nxv4i16 : ValueType<64, 77>; // n x 4 x i16 vector value
def nxv8i16 : ValueType<128, 78>; // n x 8 x i16 vector value
def nxv16i16: ValueType<256, 79>; // n x 16 x i16 vector value
def nxv32i16: ValueType<512, 80>; // n x 32 x i16 vector value

def nxv1i32 : ValueType<32, 81>; // n x 1 x i32 vector value
def nxv2i32 : ValueType<64, 82>; // n x 2 x i32 vector value
def nxv4i32 : ValueType<128, 83>; // n x 4 x i32 vector value
def nxv8i32 : ValueType<256, 84>; // n x 8 x i32 vector value
def nxv16i32: ValueType<512, 85>; // n x 16 x i32 vector value
def nxv32i32: ValueType<1024,86>; // n x 32 x i32 vector value

def nxv1i64 : ValueType<64, 87>; // n x 1 x i64 vector value
def nxv2i64 : ValueType<128, 88>; // n x 2 x i64 vector value
def nxv4i64 : ValueType<256, 89>; // n x 4 x i64 vector value
def nxv8i64 : ValueType<512, 90>; // n x 8 x i64 vector value
def nxv16i64: ValueType<1024,91>; // n x 16 x i64 vector value
def nxv32i64: ValueType<2048,92>; // n x 32 x i64 vector value

def v2f16 : ValueType<32 , 93>; // 2 x f16 vector value
def v3f16 : ValueType<48 , 94>; // 3 x f16 vector value
def v4f16 : ValueType<64 , 95>; // 4 x f16 vector value
def v8f16 : ValueType<128, 96>; // 8 x f16 vector value
def v16f16 : ValueType<256, 97>; // 8 x f16 vector value
def v32f16 : ValueType<512, 98>; // 8 x f16 vector value
def v1f32 : ValueType<32 , 99>; // 1 x f32 vector value
def v2f32 : ValueType<64 , 100>; // 2 x f32 vector value
def v3f32 : ValueType<96 , 101>; // 3 x f32 vector value
def v4f32 : ValueType<128, 102>; // 4 x f32 vector value
def v5f32 : ValueType<160, 103>; // 5 x f32 vector value
def v8f32 : ValueType<256, 104>; // 8 x f32 vector value
def v16f32 : ValueType<512, 105>; // 16 x f32 vector value
def v32f32 : ValueType<1024, 106>; // 32 x f32 vector value
def v64f32 : ValueType<2048, 107>; // 64 x f32 vector value
def v128f32 : ValueType<4096, 108>; // 128 x f32 vector value
def v256f32 : ValueType<8182, 109>; // 256 x f32 vector value
def v512f32 : ValueType<16384, 110>; // 512 x f32 vector value
def v1024f32 : ValueType<32768, 111>; // 1024 x f32 vector value
def v2048f32 : ValueType<65536, 112>; // 2048 x f32 vector value
def v1f64 : ValueType<64, 113>; // 1 x f64 vector value
def v2f64 : ValueType<128, 114>; // 2 x f64 vector value
def v4f64 : ValueType<256, 115>; // 4 x f64 vector value
def v8f64 : ValueType<512, 116>; // 8 x f64 vector value
+def v2f16 : ValueType<32 , 63>; // 2 x f16 vector value
+def v3f16 : ValueType<48 , 64>; // 3 x f16 vector value
+def v4f16 : ValueType<64 , 65>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 66>; // 8 x f16 vector value
+def v16f16 : ValueType<256, 67>; // 8 x f16 vector value
+def v32f16 : ValueType<512, 68>; // 8 x f16 vector value
+def v1f32 : ValueType<32 , 69>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 70>; // 2 x f32 vector value
+def v3f32 : ValueType<96 , 71>; // 3 x f32 vector value
+def v4f32 : ValueType<128, 72>; // 4 x f32 vector value
+def v5f32 : ValueType<160, 73>; // 5 x f32 vector value
+def v8f32 : ValueType<256, 74>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 75>; // 16 x f32 vector value
+def v32f32 : ValueType<1024, 76>; // 32 x f32 vector value
+def v64f32 : ValueType<2048, 77>; // 64 x f32 vector value
+def v128f32 : ValueType<4096, 78>; // 128 x f32 vector value
+def v256f32 : ValueType<8182, 79>; // 256 x f32 vector value
+def v512f32 : ValueType<16384, 80>; // 512 x f32 vector value
+def v1024f32 : ValueType<32768, 81>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 82>; // 2048 x f32 vector value
+def v1f64 : ValueType<64, 83>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 84>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 85>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 86>; // 8 x f64 vector value
+
+def nxv1i1 : ValueType<1, 87>; // n x 1 x i1 vector value
+def nxv2i1 : ValueType<2, 88>; // n x 2 x i1 vector value
+def nxv4i1 : ValueType<4, 89>; // n x 4 x i1 vector value
+def nxv8i1 : ValueType<8, 90>; // n x 8 x i1 vector value
+def nxv16i1 : ValueType<16, 91>; // n x 16 x i1 vector value
+def nxv32i1 : ValueType<32, 92>; // n x 32 x i1 vector value
+
+def nxv1i8 : ValueType<8, 93>; // n x 1 x i8 vector value
+def nxv2i8 : ValueType<16, 94>; // n x 2 x i8 vector value
+def nxv4i8 : ValueType<32, 95>; // n x 4 x i8 vector value
+def nxv8i8 : ValueType<64, 96>; // n x 8 x i8 vector value
+def nxv16i8 : ValueType<128, 97>; // n x 16 x i8 vector value
+def nxv32i8 : ValueType<256, 98>; // n x 32 x i8 vector value
+
+def nxv1i16 : ValueType<16, 99>; // n x 1 x i16 vector value
+def nxv2i16 : ValueType<32, 100>; // n x 2 x i16 vector value
+def nxv4i16 : ValueType<64, 101>; // n x 4 x i16 vector value
+def nxv8i16 : ValueType<128, 102>; // n x 8 x i16 vector value
+def nxv16i16: ValueType<256, 103>; // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 104>; // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32, 105>; // n x 1 x i32 vector value
+def nxv2i32 : ValueType<64, 106>; // n x 2 x i32 vector value
+def nxv4i32 : ValueType<128, 107>; // n x 4 x i32 vector value
+def nxv8i32 : ValueType<256, 108>; // n x 8 x i32 vector value
+def nxv16i32: ValueType<512, 109>; // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,110>; // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64, 111>; // n x 1 x i64 vector value
+def nxv2i64 : ValueType<128, 112>; // n x 2 x i64 vector value
+def nxv4i64 : ValueType<256, 113>; // n x 4 x i64 vector value
+def nxv8i64 : ValueType<512, 114>; // n x 8 x i64 vector value
+def nxv16i64: ValueType<1024,115>; // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,116>; // n x 32 x i64 vector value
def nxv2f16 : ValueType<32 , 117>; // n x 2 x f16 vector value
def nxv4f16 : ValueType<64 , 118>; // n x 4 x f16 vector value
def nxv8f16 : ValueType<128, 119>; // n x 8 x f16 vector value
def nxv1f32 : ValueType<32 , 120>; // n x 1 x f32 vector value
def nxv2f32 : ValueType<64 , 121>; // n x 2 x f32 vector value
def nxv4f32 : ValueType<128, 122>; // n x 4 x f32 vector value
def nxv8f32 : ValueType<256, 123>; // n x 8 x f32 vector value
def nxv16f32 : ValueType<512, 124>; // n x 16 x f32 vector value
def nxv1f64 : ValueType<64, 125>; // n x 1 x f64 vector value
def nxv2f64 : ValueType<128, 126>; // n x 2 x f64 vector value
def nxv4f64 : ValueType<256, 127>; // n x 4 x f64 vector value
def nxv8f64 : ValueType<512, 128>; // n x 8 x f64 vector value
def x86mmx : ValueType<64 , 129>; // X86 MMX value
def FlagVT : ValueType<0 , 130>; // PreRA sched glue
def isVoid : ValueType<0 , 131>; // Produces no value
def untyped: ValueType<8 , 132>; // Produces an untyped value
def exnref: ValueType<0, 133>; // WebAssembly's exnref type
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata
// Pseudo valuetype mapped to the current pointer size to any address space.
// Should only be used in TableGen.
def iPTRAny : ValueType<0, 250>;
// Pseudo valuetype to represent "vector of any size"
def vAny : ValueType<0 , 251>;
// Pseudo valuetype to represent "float of any format"
def fAny : ValueType<0 , 252>;
// Pseudo valuetype to represent "integer of any bit width"
def iAny : ValueType<0 , 253>;
// Pseudo valuetype mapped to the current pointer size.
def iPTR : ValueType<0 , 254>;
// Pseudo valuetype to represent "any type of any size".
def Any : ValueType<0 , 255>;
/// This class is for targets that want to use pointer types in patterns
/// with the GlobalISelEmitter. Targets must define their own pointer
/// derived from this class. The scalar argument should be an
/// integer type with the same bit size as the ponter.
/// e.g. def p0 : PtrValueType <i64, 0>;
class PtrValueType <ValueType scalar, int addrspace> :
ValueType<scalar.Size, scalar.Value> {
int AddrSpace = addrspace;
}
diff git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index f15475b118bd..ae3079ceb84f 100644
 a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ 1,1133 +1,1159 @@
//=== Support/MachineValueType.h  MachineLevel types * C++ *===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDXLicenseIdentifier: Apache2.0 WITH LLVMexception
//
//======//
//
// This file defines the set of machinelevel target independent types which
// legal values in the code generator use.
//
//======//
#ifndef LLVM_SUPPORT_MACHINEVALUETYPE_H
#define LLVM_SUPPORT_MACHINEVALUETYPE_H
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ScalableSize.h"
#include <cassert>
namespace llvm {
class Type;
/// Machine Value Type. Every type that is supported natively by some
/// processor targeted by LLVM occurs here. This means that any legal value
/// type can be represented by an MVT.
class MVT {
public:
enum SimpleValueType : uint8_t {
// Simple value types that aren't explicitly part of this enumeration
// are considered extended value types.
INVALID_SIMPLE_VALUE_TYPE = 0,
// If you change this numbering, you must change the values in
// ValueTypes.td as well!
Other = 1, // This is a nonstandard value
i1 = 2, // This is a 1 bit integer value
i8 = 3, // This is an 8 bit integer value
i16 = 4, // This is a 16 bit integer value
i32 = 5, // This is a 32 bit integer value
i64 = 6, // This is a 64 bit integer value
i128 = 7, // This is a 128 bit integer value
FIRST_INTEGER_VALUETYPE = i1,
LAST_INTEGER_VALUETYPE = i128,
f16 = 8, // This is a 16 bit floating point value
f32 = 9, // This is a 32 bit floating point value
f64 = 10, // This is a 64 bit floating point value
f80 = 11, // This is a 80 bit floating point value
f128 = 12, // This is a 128 bit floating point value
ppcf128 = 13, // This is a PPC 128bit floating point value
FIRST_FP_VALUETYPE = f16,
LAST_FP_VALUETYPE = ppcf128,
v1i1 = 14, // 1 x i1
v2i1 = 15, // 2 x i1
v4i1 = 16, // 4 x i1
v8i1 = 17, // 8 x i1
v16i1 = 18, // 16 x i1
v32i1 = 19, // 32 x i1
v64i1 = 20, // 64 x i1
v128i1 = 21, // 128 x i1
v512i1 = 22, // 512 x i1
v1024i1 = 23, // 1024 x i1
v1i8 = 24, // 1 x i8
v2i8 = 25, // 2 x i8
v4i8 = 26, // 4 x i8
v8i8 = 27, // 8 x i8
v16i8 = 28, // 16 x i8
v32i8 = 29, // 32 x i8
v64i8 = 30, // 64 x i8
v128i8 = 31, //128 x i8
v256i8 = 32, //256 x i8
v1i16 = 33, // 1 x i16
v2i16 = 34, // 2 x i16
v3i16 = 35, // 3 x i16
v4i16 = 36, // 4 x i16
v8i16 = 37, // 8 x i16
v16i16 = 38, // 16 x i16
v32i16 = 39, // 32 x i16
v64i16 = 40, // 64 x i16
v128i16 = 41, //128 x i16
v1i32 = 42, // 1 x i32
v2i32 = 43, // 2 x i32
v3i32 = 44, // 3 x i32
v4i32 = 45, // 4 x i32
v5i32 = 46, // 5 x i32
v8i32 = 47, // 8 x i32
v16i32 = 48, // 16 x i32
v32i32 = 49, // 32 x i32
v64i32 = 50, // 64 x i32
v128i32 = 51, // 128 x i32
v256i32 = 52, // 256 x i32
v512i32 = 53, // 512 x i32
v1024i32 = 54, // 1024 x i32
v2048i32 = 55, // 2048 x i32
v1i64 = 56, // 1 x i64
v2i64 = 57, // 2 x i64
v4i64 = 58, // 4 x i64
v8i64 = 59, // 8 x i64
v16i64 = 60, // 16 x i64
v32i64 = 61, // 32 x i64
v1i128 = 62, // 1 x i128
 // Scalable integer types
 nxv1i1 = 63, // n x 1 x i1
 nxv2i1 = 64, // n x 2 x i1
 nxv4i1 = 65, // n x 4 x i1
 nxv8i1 = 66, // n x 8 x i1
 nxv16i1 = 67, // n x 16 x i1
 nxv32i1 = 68, // n x 32 x i1

 nxv1i8 = 69, // n x 1 x i8
 nxv2i8 = 70, // n x 2 x i8
 nxv4i8 = 71, // n x 4 x i8
 nxv8i8 = 72, // n x 8 x i8
 nxv16i8 = 73, // n x 16 x i8
 nxv32i8 = 74, // n x 32 x i8

 nxv1i16 = 75, // n x 1 x i16
 nxv2i16 = 76, // n x 2 x i16
 nxv4i16 = 77, // n x 4 x i16
 nxv8i16 = 78, // n x 8 x i16
 nxv16i16 = 79, // n x 16 x i16
 nxv32i16 = 80, // n x 32 x i16

 nxv1i32 = 81, // n x 1 x i32
 nxv2i32 = 82, // n x 2 x i32
 nxv4i32 = 83, // n x 4 x i32
 nxv8i32 = 84, // n x 8 x i32
 nxv16i32 = 85, // n x 16 x i32
 nxv32i32 = 86, // n x 32 x i32

 nxv1i64 = 87, // n x 1 x i64
 nxv2i64 = 88, // n x 2 x i64
 nxv4i64 = 89, // n x 4 x i64
 nxv8i64 = 90, // n x 8 x i64
 nxv16i64 = 91, // n x 16 x i64
 nxv32i64 = 92, // n x 32 x i64

 FIRST_INTEGER_VECTOR_VALUETYPE = v1i1,
 LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64,

 FIRST_INTEGER_SCALABLE_VALUETYPE = nxv1i1,
 LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64,

 v2f16 = 93, // 2 x f16
 v3f16 = 94, // 3 x f16
 v4f16 = 95, // 4 x f16
 v8f16 = 96, // 8 x f16
 v16f16 = 97, // 16 x f16
 v32f16 = 98, // 32 x f16
 v1f32 = 99, // 1 x f32
 v2f32 = 100, // 2 x f32
 v3f32 = 101, // 3 x f32
 v4f32 = 102, // 4 x f32
 v5f32 = 103, // 5 x f32
 v8f32 = 104, // 8 x f32
 v16f32 = 105, // 16 x f32
 v32f32 = 106, // 32 x f32
 v64f32 = 107, // 64 x f32
 v128f32 = 108, // 128 x f32
 v256f32 = 109, // 256 x f32
 v512f32 = 110, // 512 x f32
 v1024f32 = 111, // 1024 x f32
 v2048f32 = 112, // 2048 x f32
 v1f64 = 113, // 1 x f64
 v2f64 = 114, // 2 x f64
 v4f64 = 115, // 4 x f64
 v8f64 = 116, // 8 x f64
+ FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
+ LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
+
+ v2f16 = 63, // 2 x f16
+ v3f16 = 64, // 3 x f16
+ v4f16 = 65, // 4 x f16
+ v8f16 = 66, // 8 x f16
+ v16f16 = 67, // 16 x f16
+ v32f16 = 68, // 32 x f16
+ v1f32 = 69, // 1 x f32
+ v2f32 = 70, // 2 x f32
+ v3f32 = 71, // 3 x f32
+ v4f32 = 72, // 4 x f32
+ v5f32 = 73, // 5 x f32
+ v8f32 = 74, // 8 x f32
+ v16f32 = 75, // 16 x f32
+ v32f32 = 76, // 32 x f32
+ v64f32 = 77, // 64 x f32
+ v128f32 = 78, // 128 x f32
+ v256f32 = 79, // 256 x f32
+ v512f32 = 80, // 512 x f32
+ v1024f32 = 81, // 1024 x f32
+ v2048f32 = 82, // 2048 x f32
+ v1f64 = 83, // 1 x f64
+ v2f64 = 84, // 2 x f64
+ v4f64 = 85, // 4 x f64
+ v8f64 = 86, // 8 x f64
+
+ FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16,
+ LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v8f64,
+
+ FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
+ LAST_FIXEDLEN_VECTOR_VALUETYPE = v8f64,
+
+ nxv1i1 = 87, // n x 1 x i1
+ nxv2i1 = 88, // n x 2 x i1
+ nxv4i1 = 89, // n x 4 x i1
+ nxv8i1 = 90, // n x 8 x i1
+ nxv16i1 = 91, // n x 16 x i1
+ nxv32i1 = 92, // n x 32 x i1
+
+ nxv1i8 = 93, // n x 1 x i8
+ nxv2i8 = 94, // n x 2 x i8
+ nxv4i8 = 95, // n x 4 x i8
+ nxv8i8 = 96, // n x 8 x i8
+ nxv16i8 = 97, // n x 16 x i8
+ nxv32i8 = 98, // n x 32 x i8
+
+ nxv1i16 = 99, // n x 1 x i16
+ nxv2i16 = 100, // n x 2 x i16
+ nxv4i16 = 101, // n x 4 x i16
+ nxv8i16 = 102, // n x 8 x i16
+ nxv16i16 = 103, // n x 16 x i16
+ nxv32i16 = 104, // n x 32 x i16
+
+ nxv1i32 = 105, // n x 1 x i32
+ nxv2i32 = 106, // n x 2 x i32
+ nxv4i32 = 107, // n x 4 x i32
+ nxv8i32 = 108, // n x 8 x i32
+ nxv16i32 = 109, // n x 16 x i32
+ nxv32i32 = 110, // n x 32 x i32
+
+ nxv1i64 = 111, // n x 1 x i64
+ nxv2i64 = 112, // n x 2 x i64
+ nxv4i64 = 113, // n x 4 x i64
+ nxv8i64 = 114, // n x 8 x i64
+ nxv16i64 = 115, // n x 16 x i64
+ nxv32i64 = 116, // n x 32 x i64
+
+ FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
+ LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
nxv2f16 = 117, // n x 2 x f16
nxv4f16 = 118, // n x 4 x f16
nxv8f16 = 119, // n x 8 x f16
nxv1f32 = 120, // n x 1 x f32
nxv2f32 = 121, // n x 2 x f32
nxv4f32 = 122, // n x 4 x f32
nxv8f32 = 123, // n x 8 x f32
nxv16f32 = 124, // n x 16 x f32
nxv1f64 = 125, // n x 1 x f64
nxv2f64 = 126, // n x 2 x f64
nxv4f64 = 127, // n x 4 x f64
nxv8f64 = 128, // n x 8 x f64
 FIRST_FP_VECTOR_VALUETYPE = v2f16,
 LAST_FP_VECTOR_VALUETYPE = nxv8f64,
+ FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv2f16,
+ LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
 FIRST_FP_SCALABLE_VALUETYPE = nxv2f16,
 LAST_FP_SCALABLE_VALUETYPE = nxv8f64,
+ FIRST_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
+ LAST_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
x86mmx = 129, // This is an X86 MMX value
Glue = 130, // This glues nodes together during preRA sched
isVoid = 131, // This has no value
Untyped = 132, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
exnref = 133, // WebAssembly's exnref type
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
LAST_VALUETYPE = 134, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
// This value must be a multiple of 32.
MAX_ALLOWED_VALUETYPE = 160,
// A value of type llvm::TokenTy
token = 248,
// This is MDNode or MDString.
Metadata = 249,
// An int value the size of the pointer of the current
// target to any address space. This must only be used internal to
// tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
iPTRAny = 250,
// A vector with any length and element size. This is used
// for intrinsics that have overloadings based on vector types.
// This is only for tblgen's consumption!
vAny = 251,
// Any floatingpoint or vector floatingpoint value. This is used
// for intrinsics that have overloadings based on floatingpoint types.
// This is only for tblgen's consumption!
fAny = 252,
// An integer or vector integer value of any bit width. This is
// used for intrinsics that have overloadings based on integer bit widths.
// This is only for tblgen's consumption!
iAny = 253,
// An int value the size of the pointer of the current
// target. This should only be used internal to tblgen!
iPTR = 254,
// Any type. This is used for intrinsics that have overloadings.
// This is only for tblgen's consumption!
Any = 255
};
SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE;
constexpr MVT() = default;
constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
bool operator>(const MVT& S) const { return SimpleTy > S.SimpleTy; }
bool operator<(const MVT& S) const { return SimpleTy < S.SimpleTy; }
bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; }
bool operator!=(const MVT& S) const { return SimpleTy != S.SimpleTy; }
bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
/// Return true if this is a valid simple valuetype.
bool isValid() const {
return (SimpleTy >= MVT::FIRST_VALUETYPE &&
SimpleTy < MVT::LAST_VALUETYPE);
}
/// Return true if this is a FP or a vector FP type.
bool isFloatingPoint() const {
return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
SimpleTy <= MVT::LAST_FP_VALUETYPE) 
 (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE &&
 SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE));
+ (SimpleTy >= MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE) 
+ (SimpleTy >= MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE));
}
/// Return true if this is an integer or a vector integer type.
bool isInteger() const {
return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) 
 (SimpleTy >= MVT::FIRST_INTEGER_VECTOR_VALUETYPE &&
 SimpleTy <= MVT::LAST_INTEGER_VECTOR_VALUETYPE));
+ (SimpleTy >= MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE) 
+ (SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE));
}
/// Return true if this is an integer, not including vectors.
bool isScalarInteger() const {
return (SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
SimpleTy <= MVT::LAST_INTEGER_VALUETYPE);
}
/// Return true if this is a vector value type.
bool isVector() const {
return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
}
/// Return true if this is a vector value type where the
/// runtime length is machine dependent
bool isScalableVector() const {
 return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE &&
 SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) 
 (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE &&
 SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE));
+ return (SimpleTy >= MVT::FIRST_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_SCALABLE_VECTOR_VALUETYPE);
+ }
+
+ bool isFixedLengthVector() const {
+ return (SimpleTy >= MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE);
}
/// Return true if this is a 16bit vector type.
bool is16BitVector() const {
return (SimpleTy == MVT::v2i8  SimpleTy == MVT::v1i16 
SimpleTy == MVT::v16i1);
}
/// Return true if this is a 32bit vector type.
bool is32BitVector() const {
return (SimpleTy == MVT::v32i1  SimpleTy == MVT::v4i8 
SimpleTy == MVT::v2i16  SimpleTy == MVT::v1i32 
SimpleTy == MVT::v2f16  SimpleTy == MVT::v1f32);
}
/// Return true if this is a 64bit vector type.
bool is64BitVector() const {
return (SimpleTy == MVT::v64i1  SimpleTy == MVT::v8i8 
SimpleTy == MVT::v4i16  SimpleTy == MVT::v2i32 
SimpleTy == MVT::v1i64  SimpleTy == MVT::v4f16 
SimpleTy == MVT::v2f32  SimpleTy == MVT::v1f64);
}
/// Return true if this is a 128bit vector type.
bool is128BitVector() const {
return (SimpleTy == MVT::v128i1  SimpleTy == MVT::v16i8 
SimpleTy == MVT::v8i16  SimpleTy == MVT::v4i32 
SimpleTy == MVT::v2i64  SimpleTy == MVT::v1i128 
SimpleTy == MVT::v8f16  SimpleTy == MVT::v4f32 
SimpleTy == MVT::v2f64);
}
/// Return true if this is a 256bit vector type.
bool is256BitVector() const {
return (SimpleTy == MVT::v16f16  SimpleTy == MVT::v8f32 
SimpleTy == MVT::v4f64  SimpleTy == MVT::v32i8 
SimpleTy == MVT::v16i16  SimpleTy == MVT::v8i32 
SimpleTy == MVT::v4i64);
}
/// Return true if this is a 512bit vector type.
bool is512BitVector() const {
return (SimpleTy == MVT::v32f16  SimpleTy == MVT::v16f32 
SimpleTy == MVT::v8f64  SimpleTy == MVT::v512i1 
SimpleTy == MVT::v64i8  SimpleTy == MVT::v32i16 
SimpleTy == MVT::v16i32  SimpleTy == MVT::v8i64);
}
/// Return true if this is a 1024bit vector type.
bool is1024BitVector() const {
return (SimpleTy == MVT::v1024i1  SimpleTy == MVT::v128i8 
SimpleTy == MVT::v64i16  SimpleTy == MVT::v32i32 
SimpleTy == MVT::v16i64);
}
/// Return true if this is a 2048bit vector type.
bool is2048BitVector() const {
return (SimpleTy == MVT::v256i8  SimpleTy == MVT::v128i16 
SimpleTy == MVT::v64i32  SimpleTy == MVT::v32i64);
}
/// Return true if this is an overloaded type for TableGen.
bool isOverloaded() const {
return (SimpleTy==MVT::Any 
SimpleTy==MVT::iAny  SimpleTy==MVT::fAny 
SimpleTy==MVT::vAny  SimpleTy==MVT::iPTRAny);
}
/// Return a VT for a vector type with the same element type but
/// half the number of elements.
MVT getHalfNumVectorElementsVT() const {
MVT EltVT = getVectorElementType();
auto EltCnt = getVectorElementCount();
assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!");
return getVectorVT(EltVT, EltCnt / 2);
}
/// Returns true if the given vector is a power of 2.
bool isPow2VectorType() const {
unsigned NElts = getVectorNumElements();
return !(NElts & (NElts  1));
}
/// Widens the length of the given vector MVT up to the nearest power of 2
/// and returns that type.
MVT getPow2VectorType() const {
if (isPow2VectorType())
return *this;
unsigned NElts = getVectorNumElements();
unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
}
/// If this is a vector, return the element type, otherwise return this.
MVT getScalarType() const {
return isVector() ? getVectorElementType() : *this;
}
MVT getVectorElementType() const {
switch (SimpleTy) {
default:
llvm_unreachable("Not a vector MVT!");
case v1i1:
case v2i1:
case v4i1:
case v8i1:
case v16i1:
case v32i1:
case v64i1:
case v128i1:
case v512i1:
case v1024i1:
case nxv1i1:
case nxv2i1:
case nxv4i1:
case nxv8i1:
case nxv16i1:
case nxv32i1: return i1;
case v1i8:
case v2i8:
case v4i8:
case v8i8:
case v16i8:
case v32i8:
case v64i8:
case v128i8:
case v256i8:
case nxv1i8:
case nxv2i8:
case nxv4i8:
case nxv8i8:
case nxv16i8:
case nxv32i8: return i8;
case v1i16:
case v2i16:
case v3i16:
case v4i16:
case v8i16:
case v16i16:
case v32i16:
case v64i16:
case v128i16:
case nxv1i16:
case nxv2i16:
case nxv4i16:
case nxv8i16:
case nxv16i16:
case nxv32i16: return i16;
case v1i32:
case v2i32:
case v3i32:
case v4i32:
case v5i32:
case v8i32:
case v16i32:
case v32i32:
case v64i32:
case v128i32:
case v256i32:
case v512i32:
case v1024i32:
case v2048i32:
case nxv1i32:
case nxv2i32:
case nxv4i32:
case nxv8i32:
case nxv16i32:
case nxv32i32: return i32;
case v1i64:
case v2i64:
case v4i64:
case v8i64:
case v16i64:
case v32i64:
case nxv1i64:
case nxv2i64:
case nxv4i64:
case nxv8i64:
case nxv16i64:
case nxv32i64: return i64;
case v1i128: return i128;
case v2f16:
case v3f16:
case v4f16:
case v8f16:
case v16f16:
case v32f16:
case nxv2f16:
case nxv4f16:
case nxv8f16: return f16;
case v1f32:
case v2f32:
case v3f32:
case v4f32:
case v5f32:
case v8f32:
case v16f32:
case v32f32:
case v64f32:
case v128f32:
case v256f32:
case v512f32:
case v1024f32:
case v2048f32:
case nxv1f32:
case nxv2f32:
case nxv4f32:
case nxv8f32:
case nxv16f32: return f32;
case v1f64:
case v2f64:
case v4f64:
case v8f64:
case nxv1f64:
case nxv2f64:
case nxv4f64:
case nxv8f64: return f64;
}
}
unsigned getVectorNumElements() const {
switch (SimpleTy) {
default:
llvm_unreachable("Not a vector MVT!");
case v2048i32:
case v2048f32: return 2048;
case v1024i1:
case v1024i32:
case v1024f32: return 1024;
case v512i1:
case v512i32:
case v512f32: return 512;
case v256i8:
case v256i32:
case v256f32: return 256;
case v128i1:
case v128i8:
case v128i16:
case v128i32:
case v128f32: return 128;
case v64i1:
case v64i8:
case v64i16:
case v64i32:
case v64f32: return 64;
case v32i1:
case v32i8:
case v32i16:
case v32i32:
case v32i64:
case v32f16:
case v32f32:
case nxv32i1:
case nxv32i8:
case nxv32i16:
case nxv32i32:
case nxv32i64: return 32;
case v16i1:
case v16i8:
case v16i16:
case v16i32:
case v16i64:
case v16f16:
case v16f32:
case nxv16i1:
case nxv16i8:
case nxv16i16:
case nxv16i32:
case nxv16i64:
case nxv16f32: return 16;
case v8i1:
case v8i8:
case v8i16:
case v8i32:
case v8i64:
case v8f16:
case v8f32:
case v8f64:
case nxv8i1:
case nxv8i8:
case nxv8i16:
case nxv8i32:
case nxv8i64:
case nxv8f16:
case nxv8f32:
case nxv8f64: return 8;
case v5i32:
case v5f32: return 5;
case v4i1:
case v4i8:
case v4i16:
case v4i32:
case v4i64:
case v4f16:
case v4f32:
case v4f64:
case nxv4i1:
case nxv4i8:
case nxv4i16:
case nxv4i32:
case nxv4i64:
case nxv4f16:
case nxv4f32:
case nxv4f64: return 4;
case v3i16:
case v3i32:
case v3f16:
case v3f32: return 3;
case v2i1:
case v2i8:
case v2i16:
case v2i32:
case v2i64:
case v2f16:
case v2f32:
case v2f64:
case nxv2i1:
case nxv2i8:
case nxv2i16:
case nxv2i32:
case nxv2i64:
case nxv2f16:
case nxv2f32:
case nxv2f64: return 2;
case v1i1:
case v1i8:
case v1i16:
case v1i32:
case v1i64:
case v1i128:
case v1f32:
case v1f64:
case nxv1i1:
case nxv1i8:
case nxv1i16:
case nxv1i32:
case nxv1i64:
case nxv1f32:
case nxv1f64: return 1;
}
}
ElementCount getVectorElementCount() const {
return { getVectorNumElements(), isScalableVector() };
}
unsigned getSizeInBits() const {
switch (SimpleTy) {
default:
llvm_unreachable("getSizeInBits called on extended MVT.");
case Other:
llvm_unreachable("Value type is nonstandard value, Other.");
case iPTR:
llvm_unreachable("Value type size is targetdependent. Ask TLI.");
case iPTRAny:
case iAny:
case fAny:
case vAny:
case Any:
llvm_unreachable("Value type is overloaded.");
case token:
llvm_unreachable("Token type is a sentinel that cannot be used "
"in codegen and has no size");
case Metadata:
llvm_unreachable("Value type is metadata.");
case i1:
case v1i1:
case nxv1i1: return 1;
case v2i1:
case nxv2i1: return 2;
case v4i1:
case nxv4i1: return 4;
case i8 :
case v1i8:
case v8i1:
case nxv1i8:
case nxv8i1: return 8;
case i16 :
case f16:
case v16i1:
case v2i8:
case v1i16:
case nxv16i1:
case nxv2i8:
case nxv1i16: return 16;
case f32 :
case i32 :
case v32i1:
case v4i8:
case v2i16:
case v2f16:
case v1f32:
case v1i32:
case nxv32i1:
case nxv4i8:
case nxv2i16:
case nxv1i32:
case nxv2f16:
case nxv1f32: return 32;
case v3i16:
case v3f16: return 48;
case x86mmx:
case f64 :
case i64 :
case v64i1:
case v8i8:
case v4i16:
case v2i32:
case v1i64:
case v4f16:
case v2f32:
case v1f64:
case nxv8i8:
case nxv4i16:
case nxv2i32:
case nxv1i64:
case nxv4f16:
case nxv2f32:
case nxv1f64: return 64;
case f80 : return 80;
case v3i32:
case v3f32: return 96;
case f128:
case ppcf128:
case i128:
case v128i1:
case v16i8:
case v8i16:
case v4i32:
case v2i64:
case v1i128:
case v8f16:
case v4f32:
case v2f64:
case nxv16i8:
case nxv8i16:
case nxv4i32:
case nxv2i64:
case nxv8f16:
case nxv4f32:
case nxv2f64: return 128;
case v5i32:
case v5f32: return 160;
case v32i8:
case v16i16:
case v8i32:
case v4i64:
case v16f16:
case v8f32:
case v4f64:
case nxv32i8:
case nxv16i16:
case nxv8i32:
case nxv4i64:
case nxv8f32:
case nxv4f64: return 256;
case v512i1:
case v64i8:
case v32i16:
case v16i32:
case v8i64:
case v32f16:
case v16f32:
case v8f64:
case nxv32i16:
case nxv16i32:
case nxv8i64:
case nxv16f32:
case nxv8f64: return 512;
case v1024i1:
case v128i8:
case v64i16:
case v32i32:
case v16i64:
case v32f32:
case nxv32i32:
case nxv16i64: return 1024;
case v256i8:
case v128i16:
case v64i32:
case v32i64:
case v64f32:
case nxv32i64: return 2048;
case v128i32:
case v128f32: return 4096;
case v256i32:
case v256f32: return 8192;
case v512i32:
case v512f32: return 16384;
case v1024i32:
case v1024f32: return 32768;
case v2048i32:
case v2048f32: return 65536;
case exnref: return 0; // opaque type
}
}
unsigned getScalarSizeInBits() const {
return getScalarType().getSizeInBits();
}
/// Return the number of bytes overwritten by a store of the specified value
/// type.
unsigned getStoreSize() const {
return (getSizeInBits() + 7) / 8;
}
/// Return the number of bits overwritten by a store of the specified value
/// type.
unsigned getStoreSizeInBits() const {
return getStoreSize() * 8;
}
/// Return true if this has more bits than VT.
bool bitsGT(MVT VT) const {
return getSizeInBits() > VT.getSizeInBits();
}
/// Return true if this has no less bits than VT.
bool bitsGE(MVT VT) const {
return getSizeInBits() >= VT.getSizeInBits();
}
/// Return true if this has less bits than VT.
bool bitsLT(MVT VT) const {
return getSizeInBits() < VT.getSizeInBits();
}
/// Return true if this has no more bits than VT.
bool bitsLE(MVT VT) const {
return getSizeInBits() <= VT.getSizeInBits();
}
static MVT getFloatingPointVT(unsigned BitWidth) {
switch (BitWidth) {
default:
llvm_unreachable("Bad bit width!");
case 16:
return MVT::f16;
case 32:
return MVT::f32;
case 64:
return MVT::f64;
case 80:
return MVT::f80;
case 128:
return MVT::f128;
}
}
static MVT getIntegerVT(unsigned BitWidth) {
switch (BitWidth) {
default:
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
case 1:
return MVT::i1;
case 8:
return MVT::i8;
case 16:
return MVT::i16;
case 32:
return MVT::i32;
case 64:
return MVT::i64;
case 128:
return MVT::i128;
}
}
static MVT getVectorVT(MVT VT, unsigned NumElements) {
switch (VT.SimpleTy) {
default:
break;
case MVT::i1:
if (NumElements == 1) return MVT::v1i1;
if (NumElements == 2) return MVT::v2i1;
if (NumElements == 4) return MVT::v4i1;
if (NumElements == 8) return MVT::v8i1;
if (NumElements == 16) return MVT::v16i1;
if (NumElements == 32) return MVT::v32i1;
if (NumElements == 64) return MVT::v64i1;
if (NumElements == 128) return MVT::v128i1;
if (NumElements == 512) return MVT::v512i1;
if (NumElements == 1024) return MVT::v1024i1;
break;
case MVT::i8:
if (NumElements == 1) return MVT::v1i8;
if (NumElements == 2) return MVT::v2i8;
if (NumElements == 4) return MVT::v4i8;
if (NumElements == 8) return MVT::v8i8;
if (NumElements == 16) return MVT::v16i8;
if (NumElements == 32) return MVT::v32i8;
if (NumElements == 64) return MVT::v64i8;
if (NumElements == 128) return MVT::v128i8;
if (NumElements == 256) return MVT::v256i8;
break;
case MVT::i16:
if (NumElements == 1) return MVT::v1i16;
if (NumElements == 2) return MVT::v2i16;
if (NumElements == 3) return MVT::v3i16;
if (NumElements == 4) return MVT::v4i16;
if (NumElements == 8) return MVT::v8i16;
if (NumElements == 16) return MVT::v16i16;
if (NumElements == 32) return MVT::v32i16;
if (NumElements == 64) return MVT::v64i16;
if (NumElements == 128) return MVT::v128i16;
break;
case MVT::i32:
if (NumElements == 1) return MVT::v1i32;
if (NumElements == 2) return MVT::v2i32;
if (NumElements == 3) return MVT::v3i32;
if (NumElements == 4) return MVT::v4i32;
if (NumElements == 5) return MVT::v5i32;
if (NumElements == 8) return MVT::v8i32;
if (NumElements == 16) return MVT::v16i32;
if (NumElements == 32) return MVT::v32i32;
if (NumElements == 64) return MVT::v64i32;
if (NumElements == 128) return MVT::v128i32;
if (NumElements == 256) return MVT::v256i32;
if (NumElements == 512) return MVT::v512i32;
if (NumElements == 1024) return MVT::v1024i32;
if (NumElements == 2048) return MVT::v2048i32;
break;
case MVT::i64:
if (NumElements == 1) return MVT::v1i64;
if (NumElements == 2) return MVT::v2i64;
if (NumElements == 4) return MVT::v4i64;
if (NumElements == 8) return MVT::v8i64;
if (NumElements == 16) return MVT::v16i64;
if (NumElements == 32) return MVT::v32i64;
break;
case MVT::i128:
if (NumElements == 1) return MVT::v1i128;
break;
case MVT::f16:
if (NumElements == 2) return MVT::v2f16;
if (NumElements == 3) return MVT::v3f16;
if (NumElements == 4) return MVT::v4f16;
if (NumElements == 8) return MVT::v8f16;
if (NumElements == 16) return MVT::v16f16;
if (NumElements == 32) return MVT::v32f16;
break;
case MVT::f32:
if (NumElements == 1) return MVT::v1f32;
if (NumElements == 2) return MVT::v2f32;
if (NumElements == 3) return MVT::v3f32;
if (NumElements == 4) return MVT::v4f32;
if (NumElements == 5) return MVT::v5f32;
if (NumElements == 8) return MVT::v8f32;
if (NumElements == 16) return MVT::v16f32;
if (NumElements == 32) return MVT::v32f32;
if (NumElements == 64) return MVT::v64f32;
if (NumElements == 128) return MVT::v128f32;
if (NumElements == 256) return MVT::v256f32;
if (NumElements == 512) return MVT::v512f32;
if (NumElements == 1024) return MVT::v1024f32;
if (NumElements == 2048) return MVT::v2048f32;
break;
case MVT::f64:
if (NumElements == 1) return MVT::v1f64;
if (NumElements == 2) return MVT::v2f64;
if (NumElements == 4) return MVT::v4f64;
if (NumElements == 8) return MVT::v8f64;
break;
}
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
}
static MVT getScalableVectorVT(MVT VT, unsigned NumElements) {
switch(VT.SimpleTy) {
default:
break;
case MVT::i1:
if (NumElements == 1) return MVT::nxv1i1;
if (NumElements == 2) return MVT::nxv2i1;
if (NumElements == 4) return MVT::nxv4i1;
if (NumElements == 8) return MVT::nxv8i1;
if (NumElements == 16) return MVT::nxv16i1;
if (NumElements == 32) return MVT::nxv32i1;
break;
case MVT::i8:
if (NumElements == 1) return MVT::nxv1i8;
if (NumElements == 2) return MVT::nxv2i8;
if (NumElements == 4) return MVT::nxv4i8;
if (NumElements == 8) return MVT::nxv8i8;
if (NumElements == 16) return MVT::nxv16i8;
if (NumElements == 32) return MVT::nxv32i8;
break;
case MVT::i16:
if (NumElements == 1) return MVT::nxv1i16;
if (NumElements == 2) return MVT::nxv2i16;
if (NumElements == 4) return MVT::nxv4i16;
if (NumElements == 8) return MVT::nxv8i16;
if (NumElements == 16) return MVT::nxv16i16;
if (NumElements == 32) return MVT::nxv32i16;
break;
case MVT::i32:
if (NumElements == 1) return MVT::nxv1i32;
if (NumElements == 2) return MVT::nxv2i32;
if (NumElements == 4) return MVT::nxv4i32;
if (NumElements == 8) return MVT::nxv8i32;
if (NumElements == 16) return MVT::nxv16i32;
if (NumElements == 32) return MVT::nxv32i32;
break;
case MVT::i64:
if (NumElements == 1) return MVT::nxv1i64;
if (NumElements == 2) return MVT::nxv2i64;
if (NumElements == 4) return MVT::nxv4i64;
if (NumElements == 8) return MVT::nxv8i64;
if (NumElements == 16) return MVT::nxv16i64;
if (NumElements == 32) return MVT::nxv32i64;
break;
case MVT::f16:
if (NumElements == 2) return MVT::nxv2f16;
if (NumElements == 4) return MVT::nxv4f16;
if (NumElements == 8) return MVT::nxv8f16;
break;
case MVT::f32:
if (NumElements == 1) return MVT::nxv1f32;
if (NumElements == 2) return MVT::nxv2f32;
if (NumElements == 4) return MVT::nxv4f32;
if (NumElements == 8) return MVT::nxv8f32;
if (NumElements == 16) return MVT::nxv16f32;
break;
case MVT::f64:
if (NumElements == 1) return MVT::nxv1f64;
if (NumElements == 2) return MVT::nxv2f64;
if (NumElements == 4) return MVT::nxv4f64;
if (NumElements == 8) return MVT::nxv8f64;
break;
}
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
}
static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) {
if (IsScalable)
return getScalableVectorVT(VT, NumElements);
return getVectorVT(VT, NumElements);
}
static MVT getVectorVT(MVT VT, ElementCount EC) {
if (EC.Scalable)
return getScalableVectorVT(VT, EC.Min);
return getVectorVT(VT, EC.Min);
}
/// Return the value type corresponding to the specified type. This returns
/// all pointers as iPTR. If HandleUnknown is true, unknown types are
/// returned as Other, otherwise they are invalid.
static MVT getVT(Type *Ty, bool HandleUnknown = false);
private:
/// A simple iterator over the MVT::SimpleValueType enum.
struct mvt_iterator {
SimpleValueType VT;
mvt_iterator(SimpleValueType VT) : VT(VT) {}
MVT operator*() const { return VT; }
bool operator!=(const mvt_iterator &LHS) const { return VT != LHS.VT; }
mvt_iterator& operator++() {
VT = (MVT::SimpleValueType)((int)VT + 1);
assert((int)VT <= MVT::MAX_ALLOWED_VALUETYPE &&
"MVT iterator overflowed.");
return *this;
}
};
/// A range of the MVT::SimpleValueType enum.
using mvt_range = iterator_range<mvt_iterator>;
public:
/// SimpleValueType Iteration
/// @{
static mvt_range all_valuetypes() {
return mvt_range(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE);
}
static mvt_range integer_valuetypes() {
return mvt_range(MVT::FIRST_INTEGER_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_INTEGER_VALUETYPE + 1));
}
static mvt_range fp_valuetypes() {
return mvt_range(MVT::FIRST_FP_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_FP_VALUETYPE + 1));
}
static mvt_range vector_valuetypes() {
return mvt_range(MVT::FIRST_VECTOR_VALUETYPE,
(MVT::SimpleValueType)(MVT::LAST_VECTOR_VALUETYPE + 1));
}
 static mvt_range integer_vector_valuetypes() {
+ static mvt_range fixedlen_vector_valuetypes() {
+ return mvt_range(
+ MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE + 1));
+ }
+
+ static mvt_range scalable_vector_valuetypes() {
return mvt_range(
 MVT::FIRST_INTEGER_VECTOR_VALUETYPE,
 (MVT::SimpleValueType)(MVT::LAST_INTEGER_VECTOR_VALUETYPE + 1));
+ MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_SCALABLE_VECTOR_VALUETYPE + 1));
}
 static mvt_range fp_vector_valuetypes() {
+ static mvt_range integer_fixedlen_vector_valuetypes() {
return mvt_range(
 MVT::FIRST_FP_VECTOR_VALUETYPE,
 (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1));
+ MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE + 1));
+ }
+
+ static mvt_range fp_fixedlen_vector_valuetypes() {
+ return mvt_range(
+ MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE + 1));
}
static mvt_range integer_scalable_vector_valuetypes() {
 return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE,
 (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1));
+ return mvt_range(
+ MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE + 1));
}
static mvt_range fp_scalable_vector_valuetypes() {
 return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE,
 (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1));
+ return mvt_range(
+ MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
+ (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE + 1));
}
/// @}
};
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINEVALUETYPE_H
diff git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 7c6860eb26c3..0eb10a110421 100644
 a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ 1,1978 +1,1979 @@
//=== TargetLoweringBase.cpp  Implement the TargetLoweringBase class ===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDXLicenseIdentifier: Apache2.0 WITH LLVMexception
//
//======//
//
// This implements the TargetLoweringBase class.
//
//======//
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <iterator>
#include <string>
#include <tuple>
#include <utility>
using namespace llvm;
static cl::opt<bool> JumpIsExpensiveOverride(
"jumpisexpensive", cl::init(false),
cl::desc("Do not create extra branches to split comparison logic."),
cl::Hidden);
static cl::opt<unsigned> MinimumJumpTableEntries
("minjumptableentries", cl::init(4), cl::Hidden,
cl::desc("Set minimum number of entries to use a jump table."));
static cl::opt<unsigned> MaximumJumpTableSize
("maxjumptablesize", cl::init(UINT_MAX), cl::Hidden,
cl::desc("Set maximum size of jump tables."));
/// Minimum jump table density for normal functions.
static cl::opt<unsigned>
JumpTableDensity("jumptabledensity", cl::init(10), cl::Hidden,
cl::desc("Minimum density for building a jump table in "
"a normal function"));
/// Minimum jump table density for Os or Oz functions.
static cl::opt<unsigned> OptsizeJumpTableDensity(
"optsizejumptabledensity", cl::init(40), cl::Hidden,
cl::desc("Minimum density for building a jump table in "
"an optsize function"));
static bool darwinHasSinCos(const Triple &TT) {
assert(TT.isOSDarwin() && "should be called with darwin triple");
// Don't bother with 32 bit x86.
if (TT.getArch() == Triple::x86)
return false;
// Macos < 10.9 has no sincos_stret.
if (TT.isMacOSX())
return !TT.isMacOSXVersionLT(10, 9) && TT.isArch64Bit();
// iOS < 7.0 has no sincos_stret.
if (TT.isiOS())
return !TT.isOSVersionLT(7, 0);
// Any other darwin such as WatchOS/TvOS is new enough.
return true;
}
// Although this default value is arbitrary, it is not random. It is assumed
// that a condition that evaluates the same way by a higher percentage than this
// is best represented as control flow. Therefore, the default value N should be
// set such that the win from N% correct executions is greater than the loss
// from (100  N)% mispredicted executions for the majority of intended targets.
static cl::opt<int> MinPercentageForPredictableBranch(
"minpredictablebranch", cl::init(99),
cl::desc("Minimum percentage (0100) that a condition must be either true "
"or false to assume that the condition is predictable"),
cl::Hidden);
void TargetLoweringBase::InitLibcalls(const Triple &TT) {
#define HANDLE_LIBCALL(code, name) \
setLibcallName(RTLIB::code, name);
#include "llvm/IR/RuntimeLibcalls.def"
#undef HANDLE_LIBCALL
// Initialize calling conventions to their default.
for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
// For IEEE quadprecision libcall names, PPC uses "kf" instead of "tf".
if (TT.getArch() == Triple::ppc  TT.isPPC64()) {
setLibcallName(RTLIB::ADD_F128, "__addkf3");
setLibcallName(RTLIB::SUB_F128, "__subkf3");
setLibcallName(RTLIB::MUL_F128, "__mulkf3");
setLibcallName(RTLIB::DIV_F128, "__divkf3");
setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
setLibcallName(RTLIB::UNE_F128, "__nekf2");
setLibcallName(RTLIB::OGE_F128, "__gekf2");
setLibcallName(RTLIB::OLT_F128, "__ltkf2");
setLibcallName(RTLIB::OLE_F128, "__lekf2");
setLibcallName(RTLIB::OGT_F128, "__gtkf2");
setLibcallName(RTLIB::UO_F128, "__unordkf2");
setLibcallName(RTLIB::O_F128, "__unordkf2");
}
// A few names are different on particular architectures or environments.
if (TT.isOSDarwin()) {
// For f16/f32 conversions, Darwin uses the standard naming scheme, instead
// of the gnueabistyle __gnu_*_ieee.
// FIXME: What about other targets?
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
// Some darwins have an optimized __bzero/bzero function.
switch (TT.getArch()) {
case Triple::x86:
case Triple::x86_64:
if (TT.isMacOSX() && !TT.isMacOSXVersionLT(10, 6))
setLibcallName(RTLIB::BZERO, "__bzero");
break;
case Triple::aarch64:
case Triple::aarch64_32:
setLibcallName(RTLIB::BZERO, "bzero");
break;
default:
break;
}
if (darwinHasSinCos(TT)) {
setLibcallName(RTLIB::SINCOS_STRET_F32, "__sincosf_stret");
setLibcallName(RTLIB::SINCOS_STRET_F64, "__sincos_stret");
if (TT.isWatchABI()) {
setLibcallCallingConv(RTLIB::SINCOS_STRET_F32,
CallingConv::ARM_AAPCS_VFP);
setLibcallCallingConv(RTLIB::SINCOS_STRET_F64,
CallingConv::ARM_AAPCS_VFP);
}
}
} else {
setLibcallName(RTLIB::FPEXT_F16_F32, "__gnu_h2f_ieee");
setLibcallName(RTLIB::FPROUND_F32_F16, "__gnu_f2h_ieee");
}
if (TT.isGNUEnvironment()  TT.isOSFuchsia() 
(TT.isAndroid() && !TT.isAndroidVersionLT(9))) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
setLibcallName(RTLIB::SINCOS_F80, "sincosl");
setLibcallName(RTLIB::SINCOS_F128, "sincosl");
setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
}
if (TT.isPS4CPU()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
setLibcallName(RTLIB::SINCOS_F64, "sincos");
}
if (TT.isOSOpenBSD()) {
setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
}
}
/// getFPEXT  Return the FPEXT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f16) {
if (RetVT == MVT::f32)
return FPEXT_F16_F32;
} else if (OpVT == MVT::f32) {
if (RetVT == MVT::f64)
return FPEXT_F32_F64;
if (RetVT == MVT::f128)
return FPEXT_F32_F128;
if (RetVT == MVT::ppcf128)
return FPEXT_F32_PPCF128;
} else if (OpVT == MVT::f64) {
if (RetVT == MVT::f128)
return FPEXT_F64_F128;
else if (RetVT == MVT::ppcf128)
return FPEXT_F64_PPCF128;
} else if (OpVT == MVT::f80) {
if (RetVT == MVT::f128)
return FPEXT_F80_F128;
}
return UNKNOWN_LIBCALL;
}
/// getFPROUND  Return the FPROUND_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::f16) {
if (OpVT == MVT::f32)
return FPROUND_F32_F16;
if (OpVT == MVT::f64)
return FPROUND_F64_F16;
if (OpVT == MVT::f80)
return FPROUND_F80_F16;
if (OpVT == MVT::f128)
return FPROUND_F128_F16;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F16;
} else if (RetVT == MVT::f32) {
if (OpVT == MVT::f64)
return FPROUND_F64_F32;
if (OpVT == MVT::f80)
return FPROUND_F80_F32;
if (OpVT == MVT::f128)
return FPROUND_F128_F32;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F32;
} else if (RetVT == MVT::f64) {
if (OpVT == MVT::f80)
return FPROUND_F80_F64;
if (OpVT == MVT::f128)
return FPROUND_F128_F64;
if (OpVT == MVT::ppcf128)
return FPROUND_PPCF128_F64;
} else if (RetVT == MVT::f80) {
if (OpVT == MVT::f128)
return FPROUND_F128_F80;
}
return UNKNOWN_LIBCALL;
}
/// getFPTOSINT  Return the FPTOSINT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f32) {
if (RetVT == MVT::i32)
return FPTOSINT_F32_I32;
if (RetVT == MVT::i64)
return FPTOSINT_F32_I64;
if (RetVT == MVT::i128)
return FPTOSINT_F32_I128;
} else if (OpVT == MVT::f64) {
if (RetVT == MVT::i32)
return FPTOSINT_F64_I32;
if (RetVT == MVT::i64)
return FPTOSINT_F64_I64;
if (RetVT == MVT::i128)
return FPTOSINT_F64_I128;
} else if (OpVT == MVT::f80) {
if (RetVT == MVT::i32)
return FPTOSINT_F80_I32;
if (RetVT == MVT::i64)
return FPTOSINT_F80_I64;
if (RetVT == MVT::i128)
return FPTOSINT_F80_I128;
} else if (OpVT == MVT::f128) {
if (RetVT == MVT::i32)
return FPTOSINT_F128_I32;
if (RetVT == MVT::i64)
return FPTOSINT_F128_I64;
if (RetVT == MVT::i128)
return FPTOSINT_F128_I128;
} else if (OpVT == MVT::ppcf128) {
if (RetVT == MVT::i32)
return FPTOSINT_PPCF128_I32;
if (RetVT == MVT::i64)
return FPTOSINT_PPCF128_I64;
if (RetVT == MVT::i128)
return FPTOSINT_PPCF128_I128;
}
return UNKNOWN_LIBCALL;
}
/// getFPTOUINT  Return the FPTOUINT_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::f32) {
if (RetVT == MVT::i32)
return FPTOUINT_F32_I32;
if (RetVT == MVT::i64)
return FPTOUINT_F32_I64;
if (RetVT == MVT::i128)
return FPTOUINT_F32_I128;
} else if (OpVT == MVT::f64) {
if (RetVT == MVT::i32)
return FPTOUINT_F64_I32;
if (RetVT == MVT::i64)
return FPTOUINT_F64_I64;
if (RetVT == MVT::i128)
return FPTOUINT_F64_I128;
} else if (OpVT == MVT::f80) {
if (RetVT == MVT::i32)
return FPTOUINT_F80_I32;
if (RetVT == MVT::i64)
return FPTOUINT_F80_I64;
if (RetVT == MVT::i128)
return FPTOUINT_F80_I128;
} else if (OpVT == MVT::f128) {
if (RetVT == MVT::i32)
return FPTOUINT_F128_I32;
if (RetVT == MVT::i64)
return FPTOUINT_F128_I64;
if (RetVT == MVT::i128)
return FPTOUINT_F128_I128;
} else if (OpVT == MVT::ppcf128) {
if (RetVT == MVT::i32)
return FPTOUINT_PPCF128_I32;
if (RetVT == MVT::i64)
return FPTOUINT_PPCF128_I64;
if (RetVT == MVT::i128)
return FPTOUINT_PPCF128_I128;
}
return UNKNOWN_LIBCALL;
}
/// getSINTTOFP  Return the SINTTOFP_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::i32) {
if (RetVT == MVT::f32)
return SINTTOFP_I32_F32;
if (RetVT == MVT::f64)
return SINTTOFP_I32_F64;
if (RetVT == MVT::f80)
return SINTTOFP_I32_F80;
if (RetVT == MVT::f128)
return SINTTOFP_I32_F128;
if (RetVT == MVT::ppcf128)
return SINTTOFP_I32_PPCF128;
} else if (OpVT == MVT::i64) {
if (RetVT == MVT::f32)
return SINTTOFP_I64_F32;
if (RetVT == MVT::f64)
return SINTTOFP_I64_F64;
if (RetVT == MVT::f80)
return SINTTOFP_I64_F80;
if (RetVT == MVT::f128)
return SINTTOFP_I64_F128;
if (RetVT == MVT::ppcf128)
return SINTTOFP_I64_PPCF128;
} else if (OpVT == MVT::i128) {
if (RetVT == MVT::f32)
return SINTTOFP_I128_F32;
if (RetVT == MVT::f64)
return SINTTOFP_I128_F64;
if (RetVT == MVT::f80)
return SINTTOFP_I128_F80;
if (RetVT == MVT::f128)
return SINTTOFP_I128_F128;
if (RetVT == MVT::ppcf128)
return SINTTOFP_I128_PPCF128;
}
return UNKNOWN_LIBCALL;
}
/// getUINTTOFP  Return the UINTTOFP_*_* value for the given types, or
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
if (OpVT == MVT::i32) {
if (RetVT == MVT::f32)
return UINTTOFP_I32_F32;
if (RetVT == MVT::f64)
return UINTTOFP_I32_F64;
if (RetVT == MVT::f80)
return UINTTOFP_I32_F80;
if (RetVT == MVT::f128)
return UINTTOFP_I32_F128;
if (RetVT == MVT::ppcf128)
return UINTTOFP_I32_PPCF128;
} else if (OpVT == MVT::i64) {
if (RetVT == MVT::f32)
return UINTTOFP_I64_F32;
if (RetVT == MVT::f64)
return UINTTOFP_I64_F64;
if (RetVT == MVT::f80)
return UINTTOFP_I64_F80;
if (RetVT == MVT::f128)
return UINTTOFP_I64_F128;
if (RetVT == MVT::ppcf128)
return UINTTOFP_I64_PPCF128;
} else if (OpVT == MVT::i128) {
if (RetVT == MVT::f32)
return UINTTOFP_I128_F32;
if (RetVT == MVT::f64)
return UINTTOFP_I128_F64;
if (RetVT == MVT::f80)
return UINTTOFP_I128_F80;
if (RetVT == MVT::f128)
return UINTTOFP_I128_F128;
if (RetVT == MVT::ppcf128)
return UINTTOFP_I128_PPCF128;
}
return UNKNOWN_LIBCALL;
}
RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
#define OP_TO_LIBCALL(Name, Enum) \
case Name: \
switch (VT.SimpleTy) { \
default: \
return UNKNOWN_LIBCALL; \
case MVT::i8: \
return Enum##_1; \
case MVT::i16: \
return Enum##_2; \
case MVT::i32: \
return Enum##_4; \
case MVT::i64: \
return Enum##_8; \
case MVT::i128: \
return Enum##_16; \
}
switch (Opc) {
OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET)
OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN)
OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN)
}
#undef OP_TO_LIBCALL
return UNKNOWN_LIBCALL;
}
RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
}
RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
}
RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
switch (ElementSize) {
case 1:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_1;
case 2:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_2;
case 4:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_4;
case 8:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_8;
case 16:
return MEMSET_ELEMENT_UNORDERED_ATOMIC_16;
default:
return UNKNOWN_LIBCALL;
}
}
/// InitCmpLibcallCCs  Set default comparison libcall CC.
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ;
CCs[RTLIB::UNE_F32] = ISD::SETNE;
CCs[RTLIB::UNE_F64] = ISD::SETNE;
CCs[RTLIB::UNE_F128] = ISD::SETNE;
CCs[RTLIB::UNE_PPCF128] = ISD::SETNE;
CCs[RTLIB::OGE_F32] = ISD::SETGE;
CCs[RTLIB::OGE_F64] = ISD::SETGE;
CCs[RTLIB::OGE_F128] = ISD::SETGE;
CCs[RTLIB::OGE_PPCF128] = ISD::SETGE;
CCs[RTLIB::OLT_F32] = ISD::SETLT;
CCs[RTLIB::OLT_F64] = ISD::SETLT;
CCs[RTLIB::OLT_F128] = ISD::SETLT;
CCs[RTLIB::OLT_PPCF128] = ISD::SETLT;
CCs[RTLIB::OLE_F32] = ISD::SETLE;
CCs[RTLIB::OLE_F64] = ISD::SETLE;
CCs[RTLIB::OLE_F128] = ISD::SETLE;
CCs[RTLIB::OLE_PPCF128] = ISD::SETLE;
CCs[RTLIB::OGT_F32] = ISD::SETGT;
CCs[RTLIB::OGT_F64] = ISD::SETGT;
CCs[RTLIB::OGT_F128] = ISD::SETGT;
CCs[RTLIB::OGT_PPCF128] = ISD::SETGT;
CCs[RTLIB::UO_F32] = ISD::SETNE;
CCs[RTLIB::UO_F64] = ISD::SETNE;
CCs[RTLIB::UO_F128] = ISD::SETNE;
CCs[RTLIB::UO_PPCF128] = ISD::SETNE;
CCs[RTLIB::O_F32] = ISD::SETEQ;
CCs[RTLIB::O_F64] = ISD::SETEQ;
CCs[RTLIB::O_F128] = ISD::SETEQ;
CCs[RTLIB::O_PPCF128] = ISD::SETEQ;
}
/// NOTE: The TargetMachine owns TLOF.
TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
initActions();
// Perform these initializations only once.
MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove =
MaxLoadsPerMemcmp = 8;
MaxGluedStoresPerMemcpy = 0;
MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize =
MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
HasMultipleConditionRegisters = false;
HasExtractBitsInsn = false;
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
EnableExtLdPromotion = false;
StackPointerRegisterToSaveRestore = 0;
BooleanContents = UndefinedBooleanContent;
BooleanFloatContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::ILP;
GatherAllAliasesMaxDepth = 18;
// TODO: the default will be switched to 0 in the next commit, along
// with the Targetspecific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
MinCmpXchgSizeInBits = 0;
SupportsUnalignedAtomics = false;
std::fill(std::begin(LibcallRoutineNames), std::end(LibcallRoutineNames), nullptr);
InitLibcalls(TM.getTargetTriple());
InitCmpLibcallCCs(CmpLibcallCCs);
}
void TargetLoweringBase::initActions() {
// All operations default to being supported.
memset(OpActions, 0, sizeof(OpActions));
memset(LoadExtActions, 0, sizeof(LoadExtActions));
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));
std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0);
for (MVT VT : MVT::fp_valuetypes()) {
MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
if (IntVT.isValid()) {
setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
}
}
// Set default actions for various operations.
for (MVT VT : MVT::all_valuetypes()) {
// Default all indexed load / store to expand.
for (unsigned IM = (unsigned)ISD::PRE_INC;
IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
setIndexedLoadAction(IM, VT, Expand);
setIndexedStoreAction(IM, VT, Expand);
}
// Most backends expect to see the node which just returns the value loaded.
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
setOperationAction(ISD::FMINNUM, VT, Expand);
setOperationAction(ISD::FMAXNUM, VT, Expand);
setOperationAction(ISD::FMINNUM_IEEE, VT, Expand);
setOperationAction(ISD::FMAXNUM_IEEE, VT, Expand);
setOperationAction(ISD::FMINIMUM, VT, Expand);
setOperationAction(ISD::FMAXIMUM, VT, Expand);
setOperationAction(ISD::FMAD, VT, Expand);
setOperationAction(ISD::SMIN, VT, Expand);
setOperationAction(ISD::SMAX, VT, Expand);
setOperationAction(ISD::UMIN, VT, Expand);
setOperationAction(ISD::UMAX, VT, Expand);
setOperationAction(ISD::ABS, VT, Expand);
setOperationAction(ISD::FSHL, VT, Expand);
setOperationAction(ISD::FSHR, VT, Expand);
setOperationAction(ISD::SADDSAT, VT, Expand);
setOperationAction(ISD::UADDSAT, VT, Expand);
setOperationAction(ISD::SSUBSAT, VT, Expand);
setOperationAction(ISD::USUBSAT, VT, Expand);
setOperationAction(ISD::SMULFIX, VT, Expand);
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
setOperationAction(ISD::UMULFIXSAT, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
setOperationAction(ISD::SSUBO, VT, Expand);
setOperationAction(ISD::UADDO, VT, Expand);
setOperationAction(ISD::USUBO, VT, Expand);
setOperationAction(ISD::SMULO, VT, Expand);
setOperationAction(ISD::UMULO, VT, Expand);
// ADDCARRY operations default to expand
setOperationAction(ISD::ADDCARRY, VT, Expand);
setOperationAction(ISD::SUBCARRY, VT, Expand);
setOperationAction(ISD::SETCCCARRY, VT, Expand);
// ADDC/ADDE/SUBC/SUBE default to expand.
setOperationAction(ISD::ADDC, VT, Expand);
setOperationAction(ISD::ADDE, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::SUBE, VT, Expand);
// These default to Expand so they will be expanded to CTLZ/CTTZ by default.
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
setOperationAction(ISD::BITREVERSE, VT, Expand);
// These library functions default to expand.
setOperationAction(ISD::FROUND, VT, Expand);
setOperationAction(ISD::FPOWI, VT, Expand);
// These operations default to expand for vector types.
if (VT.isVector()) {
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
// Constrained floatingpoint operations default to expand.
setOperationAction(ISD::STRICT_FADD, VT, Expand);
setOperationAction(ISD::STRICT_FSUB, VT, Expand);
setOperationAction(ISD::STRICT_FMUL, VT, Expand);
setOperationAction(ISD::STRICT_FDIV, VT, Expand);
setOperationAction(ISD::STRICT_FREM, VT, Expand);
setOperationAction(ISD::STRICT_FMA, VT, Expand);
setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
setOperationAction(ISD::STRICT_FPOW, VT, Expand);
setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
setOperationAction(ISD::STRICT_FSIN, VT, Expand);
setOperationAction(ISD::STRICT_FCOS, VT, Expand);
setOperationAction(ISD::STRICT_FEXP, VT, Expand);
setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
setOperationAction(ISD::STRICT_FLOG, VT, Expand);
setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
setOperationAction(ISD::STRICT_FRINT, VT, Expand);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
setOperationAction(ISD::STRICT_FROUND, VT, Expand);
setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand);
setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand);
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
// Vector reduction default to expand.
setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
// Most targets also ignore the @llvm.readcyclecounter intrinsic.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
setOperationAction(ISD::ConstantFP, MVT::f128, Expand);
// These library functions default to expand.
for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
setOperationAction(ISD::FCBRT, VT, Expand);
setOperationAction(ISD::FLOG , VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP , VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FROUND, VT, Expand);
setOperationAction(ISD::LROUND, VT, Expand);
setOperationAction(ISD::LLROUND, VT, Expand);
setOperationAction(ISD::LRINT, VT, Expand);
setOperationAction(ISD::LLRINT, VT, Expand);
}
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
// On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
// here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
}
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
}
EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
bool LegalTypes) const {
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy)
: getPointerTy(DL);
}
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
assert(isTypeLegal(VT));
switch (Op) {
default:
return false;
case ISD::SDIV:
case ISD::UDIV:
case ISD::SREM:
case ISD::UREM:
return true;
}
}
void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) {
// If the commandline option was specified, ignore this request.
if (!JumpIsExpensiveOverride.getNumOccurrences())
JumpIsExpensive = isExpensive;
}
TargetLoweringBase::LegalizeKind
TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
// If this is a simple type, use the ComputeRegisterProp mechanism.
if (VT.isSimple()) {
MVT SVT = VT.getSimpleVT();
assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
MVT NVT = TransformToType[SVT.SimpleTy];
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
assert((LA == TypeLegal  LA == TypeSoftenFloat 
(NVT.isVector() 
ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) &&
"Promote may not follow Expand or Promote");
if (LA == TypeSplitVector)
return LegalizeKind(LA,
EVT::getVectorVT(Context, SVT.getVectorElementType(),
SVT.getVectorNumElements() / 2));
if (LA == TypeScalarizeVector)
return LegalizeKind(LA, SVT.getVectorElementType());
return LegalizeKind(LA, NVT);
}
// Handle Extended Scalar Types.
if (!VT.isVector()) {
assert(VT.isInteger() && "Float types must be simple");
unsigned BitSize = VT.getSizeInBits();
// First promote to a poweroftwo size, then expand if necessary.
if (BitSize < 8  !isPowerOf2_32(BitSize)) {
EVT NVT = VT.getRoundIntegerType(Context);
assert(NVT != VT && "Unable to round integer VT");
LegalizeKind NextStep = getTypeConversion(Context, NVT);
// Avoid multistep promotion.
if (NextStep.first == TypePromoteInteger)
return NextStep;
// Return rounded integer type.
return LegalizeKind(TypePromoteInteger, NVT);
}
return LegalizeKind(TypeExpandInteger,
EVT::getIntegerVT(Context, VT.getSizeInBits() / 2));
}
// Handle vector types.
unsigned NumElts = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
// Vectors with only one element are always scalarized.
if (NumElts == 1)
return LegalizeKind(TypeScalarizeVector, EltVT);
// Try to widen vector elements until the element type is a power of two and
// promote it to a legal type later on, for example:
// <3 x i8> > <4 x i8> > <4 x i32>
if (EltVT.isInteger()) {
// Vectors with a number of elements that is not a power of two are always
// widened, for example <3 x i8> > <4 x i8>.
if (!VT.isPow2VectorType()) {
NumElts = (unsigned)NextPowerOf2(NumElts);
EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts);
return LegalizeKind(TypeWidenVector, NVT);
}
// Examine the element type.
LegalizeKind LK = getTypeConversion(Context, EltVT);
// If type is to be expanded, split the vector.
// <4 x i140> > <2 x i140>
if (LK.first == TypeExpandInteger)
return LegalizeKind(TypeSplitVector,
EVT::getVectorVT(Context, EltVT, NumElts / 2));
// Promote the integer element types until a legal vector type is found
// or until the element integer type is too big. If a legal type was not
// found, fallback to the usual mechanism of widening/splitting the
// vector.
EVT OldEltVT = EltVT;
while (true) {
// Increase the bitwidth of the element to the next powoftwo
// (which is greater than 8 bits).
EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits())
.getRoundIntegerType(Context);
// Stop trying when getting a nonsimple element type.
// Note that vector elements may be greater than legal vector element
// types. Example: X86 XMM registers hold 64bit element on 32bit
// systems.
if (!EltVT.isSimple())
break;
// Build a new vector type and check if it is legal.
MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
// Found a legal promoted vector type.
if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
return LegalizeKind(TypePromoteInteger,
EVT::getVectorVT(Context, EltVT, NumElts));
}
// Reset the type to the unexpanded type if we did not find a legal vector
// type with a promoted vector element type.
EltVT = OldEltVT;
}
// Try to widen the vector until a legal type is found.
// If there is no wider legal type, split the vector.
while (true) {
// Round up to the next power of 2.
NumElts = (unsigned)NextPowerOf2(NumElts);
// If there is no simple vector type with this many elements then there
// cannot be a larger legal vector type. Note that this assumes that
// there are no skipped intermediate vector types in the simple types.
if (!EltVT.isSimple())
break;
MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
if (LargerVector == MVT())
break;
// If this type is legal then widen the vector.
if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal)
return LegalizeKind(TypeWidenVector, LargerVector);
}
// Widen odd vectors to next power of two.
if (!VT.isPow2VectorType()) {
EVT NVT = VT.getPow2VectorType(Context);
return LegalizeKind(TypeWidenVector, NVT);
}
// Vectors with illegal element types are expanded.
EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
return LegalizeKind(TypeSplitVector, NVT);
}
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned &NumIntermediates,
MVT &RegisterVT,
TargetLoweringBase *TLI) {
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
// FIXME: We don't support nonpowerof2sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !TLI>isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
NumElts >>= 1;
NumVectorRegs <<= 1;
}
NumIntermediates = NumVectorRegs;
MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
if (!TLI>isTypeLegal(NewVT))
NewVT = EltTy;
IntermediateVT = NewVT;
unsigned NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
if (!isPowerOf2_32(NewVTSize))
NewVTSize = NextPowerOf2(NewVTSize);
MVT DestVT = TLI>getRegisterType(NewVT);
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 > i16.
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
/// isLegalRC  Return true if the value types that can be represented by the
/// specified register class are all legal.
bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI,
const TargetRegisterClass &RC) const {
for (auto I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I)
if (isTypeLegal(*I))
return true;
return false;
}
/// Replace/modify any TargetFrameIndex operands with a targtedependent
/// sequence of memory operands that is recognized by PrologEpilogInserter.
MachineBasicBlock *
TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
MachineBasicBlock *MBB) const {
MachineInstr *MI = &InitialMI;
MachineFunction &MF = *MI>getMF();
MachineFrameInfo &MFI = MF.getFrameInfo();
// We're handling multiple types of operands here:
// PATCHPOINT MetaArgs  livein, read only, direct
// STATEPOINT Deopt Spill  livethrough, read only, indirect
// STATEPOINT Deopt Alloca  livethrough, read only, direct
// (We're currently conservative and mark the deopt slots read/write in
// practice.)
// STATEPOINT GC Spill  livethrough, read/write, indirect
// STATEPOINT GC Alloca  livethrough, read/write, direct
// The livein vs livethrough is handled already (the live through ones are
// all stack slots), but we need to handle the different type of stackmap
// operands and memory effects here.
// MI changes inside this loop as we grow operands.
for(unsigned OperIdx = 0; OperIdx != MI>getNumOperands(); ++OperIdx) {
MachineOperand &MO = MI>getOperand(OperIdx);
if (!MO.isFI())
continue;
// foldMemoryOperand builds a new MI after replacing a single FI operand
// with the canonical set of five x86 addressingmode operands.
int FI = MO.getIndex();
MachineInstrBuilder MIB = BuildMI(MF, MI>getDebugLoc(), MI>getDesc());
// Copy operands before the frameindex.
for (unsigned i = 0; i < OperIdx; ++i)
MIB.add(MI>getOperand(i));
// Add frame index operands recognized by stackmaps.cpp
if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
// indirectmemref tag, size, #FI, offset.
// Used for spills inserted by StatepointLowering. This codepath is not
// used for patchpoints/stackmaps at all, for these spilling is done via
// foldMemoryOperand callback only.
assert(MI>getOpcode() == TargetOpcode::STATEPOINT && "sanity");
MIB.addImm(StackMaps::IndirectMemRefOp);
MIB.addImm(MFI.getObjectSize(FI));
MIB.add(MI>getOperand(OperIdx));
MIB.addImm(0);
} else {
// directmemref tag, #FI, offset.
// Used by patchpoint, and direct alloca arguments to statepoints
MIB.addImm(StackMaps::DirectMemRefOp);
MIB.add(MI>getOperand(OperIdx));
MIB.addImm(0);
}
// Copy the operands after the frame index.
for (unsigned i = OperIdx + 1; i != MI>getNumOperands(); ++i)
MIB.add(MI>getOperand(i));
// Inherit previous memory operands.
MIB.cloneMemRefs(*MI);
assert(MIB>mayLoad() && "Folded a stackmap use to a nonload!");
// Add a new memory operand for this FI.
assert(MFI.getObjectOffset(FI) != 1);
// Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and
// PATCHPOINT should be updated to do the same. (TODO)
if (MI>getOpcode() != TargetOpcode::STATEPOINT) {
auto Flags = MachineMemOperand::MOLoad;
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getFixedStack(MF, FI), Flags,
MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
MIB>addMemOperand(MF, MMO);
}
// Replace the instruction and update the operand index.
MBB>insert(MachineBasicBlock::iterator(MI), MIB);
OperIdx += (MIB>getNumOperands()  MI>getNumOperands())  1;
MI>eraseFromParent();
MI = MIB;
}
return MBB;
}
MachineBasicBlock *
TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI,
MachineBasicBlock *MBB) const {
assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL &&
"Called emitXRayCustomEvent on the wrong MI!");
auto &MF = *MI.getMF();
auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
MIB.add(MI.getOperand(OpIdx));
MBB>insert(MachineBasicBlock::iterator(MI), MIB);
MI.eraseFromParent();
return MBB;
}
MachineBasicBlock *
TargetLoweringBase::emitXRayTypedEvent(MachineInstr &MI,
MachineBasicBlock *MBB) const {
assert(MI.getOpcode() == TargetOpcode::PATCHABLE_TYPED_EVENT_CALL &&
"Called emitXRayTypedEvent on the wrong MI!");
auto &MF = *MI.getMF();
auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
MIB.add(MI.getOperand(OpIdx));
MBB>insert(MachineBasicBlock::iterator(MI), MIB);
MI.eraseFromParent();
return MBB;
}
/// findRepresentativeClass  Return the largest legal superreg register class
/// of the register class for the specified type and its associated "cost".
// This function is in TargetLowering because it uses RegClassForVT which would
// need to be moved to TargetRegisterInfo and would necessitate moving
// isTypeLegal over as well  a massive change that would just require
// TargetLowering having a TargetRegisterInfo class member that it would use.
std::pair<const TargetRegisterClass *, uint8_t>
TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
if (!RC)
return std::make_pair(RC, 0);
// Compute the set of all superregister classes.
BitVector SuperRegRC(TRI>getNumRegClasses());
for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI)
SuperRegRC.setBitsInMask(RCI.getMask());
// Find the first legal register class with the largest spill size.
const TargetRegisterClass *BestRC = RC;
for (unsigned i : SuperRegRC.set_bits()) {
const TargetRegisterClass *SuperRC = TRI>getRegClass(i);
// We want the largest possible spill size.
if (TRI>getSpillSize(*SuperRC) <= TRI>getSpillSize(*BestRC))
continue;
if (!isLegalRC(*TRI, *SuperRC))
continue;
BestRC = SuperRC;
}
return std::make_pair(BestRC, 1);
}
/// computeRegisterProperties  Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLoweringBase::computeRegisterProperties(
const TargetRegisterInfo *TRI) {
static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE,
"Too many value types for ValueTypeActions to hold!");
// Everything defaults to needing one register.
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
NumRegistersForVT[i] = 1;
RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
}
// ...except isVoid, which doesn't need any registers.
NumRegistersForVT[MVT::isVoid] = 0;
// Find the largest integer register class.
unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
for (; RegClassForVT[LargestIntReg] == nullptr; LargestIntReg)
assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
// Every integer value type larger than this largest register takes twice as
// many registers to represent as the previous ValueType.
for (unsigned ExpandedReg = LargestIntReg + 1;
ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) {
NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg1];
RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg  1);
ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg,
TypeExpandInteger);
}
// Inspect all of the ValueType's smaller than the largest integer
// register to see which ones need promotion.
unsigned LegalIntReg = LargestIntReg;
for (unsigned IntReg = LargestIntReg  1;
IntReg >= (unsigned)MVT::i1; IntReg) {
MVT IVT = (MVT::SimpleValueType)IntReg;
if (isTypeLegal(IVT)) {
LegalIntReg = IntReg;
} else {
RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
(MVT::SimpleValueType)LegalIntReg;
ValueTypeActions.setTypeAction(IVT, TypePromoteInteger);
}
}
// ppcf128 type is really two f64's.
if (!isTypeLegal(MVT::ppcf128)) {
if (isTypeLegal(MVT::f64)) {
NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
TransformToType[MVT::ppcf128] = MVT::f64;
ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat);
} else {
NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128];
RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128];
TransformToType[MVT::ppcf128] = MVT::i128;
ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat);
}
}
// Decide how to handle f128. If the target does not have native f128 support,
// expand it to i128 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f128)) {
NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128];
RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128];
TransformToType[MVT::f128] = MVT::i128;
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
}
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f64)) {
NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
TransformToType[MVT::f64] = MVT::i64;
ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat);
}
// Decide how to handle f32. If the target does not have native f32 support,
// expand it to i32 and we will be generating soft float library calls.
if (!isTypeLegal(MVT::f32)) {
NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
TransformToType[MVT::f32] = MVT::i32;
ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
}
// Decide how to handle f16. If the target does not have native f16 support,
// promote it to f32, because there are no f16 library calls (except for
// conversions).
if (!isTypeLegal(MVT::f16)) {
NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
TransformToType[MVT::f16] = MVT::f32;
ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
}
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType) i;
if (isTypeLegal(VT))
continue;
MVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
bool IsLegalWiderType = false;
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
switch (PreferredAction) {
case TypePromoteInteger:
// Try to promote the elements of integer vectors. If no legal
// promotion was found, fall through to the widenvector method.
 for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) {
+ for (unsigned nVT = i + 1;
+ nVT <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
ValueTypeActions.setTypeAction(VT, TypePromoteInteger);
IsLegalWiderType = true;
break;
}
}
if (IsLegalWiderType)
break;
LLVM_FALLTHROUGH;
case TypeWidenVector:
if (isPowerOf2_32(NElts)) {
// Try to widen the vector.
for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
if (SVT.getVectorElementType() == EltVT
&& SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
IsLegalWiderType = true;
break;
}
}
if (IsLegalWiderType)
break;
} else {
// Only widen to the next power of 2 to keep consistency with EVT.
MVT NVT = VT.getPow2VectorType();
if (isTypeLegal(NVT)) {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
RegisterTypeForVT[i] = NVT;
NumRegistersForVT[i] = 1;
break;
}
}
LLVM_FALLTHROUGH;
case TypeSplitVector:
case TypeScalarizeVector: {
MVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT,
NumIntermediates, RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
MVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
TransformToType[i] = MVT::Other;
if (PreferredAction == TypeScalarizeVector)
ValueTypeActions.setTypeAction(VT, TypeScalarizeVector);
else if (PreferredAction == TypeSplitVector)
ValueTypeActions.setTypeAction(VT, TypeSplitVector);
else
// Set type action according to the number of elements.
ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector
: TypeSplitVector);
} else {
TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
}
break;
}
default:
llvm_unreachable("Unknown vector legalization action!");
}
}
// Determine the 'representative' register class for each value type.
// An representative register class is the largest (meaning one which is
// not a subregister class / subreg register class) legal register class for
// a group of value types. For example, on i386, i8, i16, and i32
// representative would be GR32; while on x86_64 it's GR64.
for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
const TargetRegisterClass* RRC;
uint8_t Cost;
std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i);
RepRegClassForVT[i] = RRC;
RepRegClassCostForVT[i] = Cost;
}
}
EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &,
EVT VT) const {
assert(!VT.isVector() && "No default SetCC type for vectors!");
return getPointerTy(DL).SimpleTy;
}
MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
return MVT::i32; // return the default value
}
/// getVectorTypeBreakdown  Vector types are broken down into some number of
/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
///
/// This method returns the number of registers needed, and the VT for each
/// register. It also returns the VT and quantity of the intermediate values
/// before they are promoted/expanded.
unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT &IntermediateVT,
unsigned &NumIntermediates,
MVT &RegisterVT) const {
unsigned NumElts = VT.getVectorNumElements();
// If there is a wider vector type with the same element type as this one,
// or a promoted vector type that has the same number of elements which
// are wider, then we should convert to that legal vector type.
// This handles things like <2 x float> > <4 x float> and
// <4 x i1> > <4 x i32>.
LegalizeTypeAction TA = getTypeAction(Context, VT);
if (NumElts != 1 && (TA == TypeWidenVector  TA == TypePromoteInteger)) {
EVT RegisterEVT = getTypeToTransformTo(Context, VT);
if (isTypeLegal(RegisterEVT)) {
IntermediateVT = RegisterEVT;
RegisterVT = RegisterEVT.getSimpleVT();
NumIntermediates = 1;
return 1;
}
}
// Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
// FIXME: We don't support nonpowerof2sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !isTypeLegal(
EVT::getVectorVT(Context, EltTy, NumElts))) {
NumElts >>= 1;
NumVectorRegs <<= 1;
}
NumIntermediates = NumVectorRegs;
EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
if (!isTypeLegal(NewVT))
NewVT = EltTy;
IntermediateVT = NewVT;
MVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
unsigned NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
if (!isPowerOf2_32(NewVTSize))
NewVTSize = NextPowerOf2(NewVTSize);
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 > i16.
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DL, ReturnType, ValueVTs);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0) return;
for (unsigned j = 0, f = NumValues; j != f; ++j) {
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
// at least 32bit. But this is not necessary for nonC calling
// conventions. The frontend should mark functions whose return values
// require promoting with signext or zeroext attributes.
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
MVT MinVT = TLI.getRegisterType(ReturnType>getContext(), MVT::i32);
if (VT.bitsLT(MinVT))
VT = MinVT;
}
unsigned NumParts =
TLI.getNumRegistersForCallingConv(ReturnType>getContext(), CC, VT);
MVT PartVT =
TLI.getRegisterTypeForCallingConv(ReturnType>getContext(), CC, VT);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
Flags.setInReg();
// Propagate extension type if any
if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
Flags.setSExt();
else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
}
}
/// getByValTypeAlignment  Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
return DL.getABITypeAlignment(Ty);
}
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
unsigned AddrSpace,
unsigned Alignment,
MachineMemOperand::Flags Flags,
bool *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
// would be to implement this check directly (make this a virtual function).
// For example, the ABI alignment may change based on software platform while
// this function should only be affected by hardware implementation.
Type *Ty = VT.getTypeForEVT(Context);
if (Alignment >= DL.getABITypeAlignment(Ty)) {
// Assume that an access that meets the ABIspecified alignment is fast.
if (Fast != nullptr)
*Fast = true;
return true;
}
// This is a misaligned access.
return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
}
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
bool *Fast) const {
return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
MMO.getAlignment(), MMO.getFlags(), Fast);
}
BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
return BranchProbability(MinPercentageForPredictableBranch, 100);
}
//======//
// TargetTransformInfo Helpers
//======//
int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
enum InstructionOpcodes {
#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM,
#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM
#include "llvm/IR/Instruction.def"
};
switch (static_cast<InstructionOpcodes>(Opcode)) {
case Ret: return 0;
case Br: return 0;
case Switch: return 0;
case IndirectBr: return 0;
case Invoke: return 0;
case CallBr: return 0;
case Resume: return 0;
case Unreachable: return 0;
case CleanupRet: return 0;
case CatchRet: return 0;
case CatchPad: return 0;
case CatchSwitch: return 0;
case CleanupPad: return 0;
case FNeg: return ISD::FNEG;
case Add: return ISD::ADD;
case FAdd: return ISD::FADD;
case Sub: return ISD::SUB;
case FSub: return ISD::FSUB;
case Mul: return ISD::MUL;
case FMul: return ISD::FMUL;
case UDiv: return ISD::UDIV;
case SDiv: return ISD::SDIV;
case FDiv: return ISD::FDIV;
case URem: return ISD::UREM;
case SRem: return ISD::SREM;
case FRem: return ISD::FREM;
case Shl: return ISD::SHL;
case LShr: return ISD::SRL;
case AShr: return ISD::SRA;
case And: return ISD::AND;
case Or: return ISD::OR;
case Xor: return ISD::XOR;
case Alloca: return 0;
case Load: return ISD::LOAD;
case Store: return ISD::STORE;
case GetElementPtr: return 0;
case Fence: return 0;
case AtomicCmpXchg: return 0;
case AtomicRMW: return 0;
case Trunc: return ISD::TRUNCATE;
case ZExt: return ISD::ZERO_EXTEND;
case SExt: return ISD::SIGN_EXTEND;
case FPToUI: return ISD::FP_TO_UINT;
case FPToSI: return ISD::FP_TO_SINT;
case UIToFP: return ISD::UINT_TO_FP;
case SIToFP: return ISD::SINT_TO_FP;
case FPTrunc: return ISD::FP_ROUND;
case FPExt: return ISD::FP_EXTEND;
case PtrToInt: return ISD::BITCAST;
case IntToPtr: return ISD::BITCAST;
case BitCast: return ISD::BITCAST;
case AddrSpaceCast: return ISD::ADDRSPACECAST;
case ICmp: return ISD::SETCC;
case FCmp: return ISD::SETCC;
case PHI: return 0;
case Call: return 0;
case Select: return ISD::SELECT;
case UserOp1: return 0;
case UserOp2: return 0;
case VAArg: return 0;
case ExtractElement: return ISD::EXTRACT_VECTOR_ELT;
case InsertElement: return ISD::INSERT_VECTOR_ELT;
case ShuffleVector: return ISD::VECTOR_SHUFFLE;
case ExtractValue: return ISD::MERGE_VALUES;
case InsertValue: return ISD::MERGE_VALUES;
case LandingPad: return 0;
}
llvm_unreachable("Unknown instruction type encountered!");
}
std::pair<int, MVT>
TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
Type *Ty) const {
LLVMContext &C = Ty>getContext();
EVT MTy = getValueType(DL, Ty);
int Cost = 1;
// We keep legalizing the type until we find a legal kind. We assume that
// the only operation that costs anything is the split. After splitting
// we need to handle two types.
while (true) {
LegalizeKind LK = getTypeConversion(C, MTy);
if (LK.first == TypeLegal)
return std::make_pair(Cost, MTy.getSimpleVT());
if (LK.first == TypeSplitVector  LK.first == TypeExpandInteger)
Cost *= 2;
// Do not loop with f128 type.
if (MTy == LK.second)
return std::make_pair(Cost, MTy.getSimpleVT());
// Keep legalizing the type.
MTy = LK.second;
}
}
Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
bool UseTLS) const {
// compilerrt provides a variable with a magic name. Targets that do not
// link with compilerrt may also provide such a variable.
Module *M = IRB.GetInsertBlock()>getParent()>getParent();
const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
auto UnsafeStackPtr =
dyn_cast_or_null<GlobalVariable>(M>getNamedValue(UnsafeStackPtrVar));
Type *StackPtrTy = Type::getInt8PtrTy(M>getContext());
if (!UnsafeStackPtr) {
auto TLSModel = UseTLS ?
GlobalValue::InitialExecTLSModel :
GlobalValue::NotThreadLocal;
// The global variable is not defined yet, define it ourselves.
// We use the initialexec TLS model because we do not support the
// variable living anywhere other than in the main executable.
UnsafeStackPtr = new GlobalVariable(
*M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
UnsafeStackPtrVar, nullptr, TLSModel);
} else {
// The variable exists, check its type and attributes.
if (UnsafeStackPtr>getValueType() != StackPtrTy)
report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
if (UseTLS != UnsafeStackPtr>isThreadLocal())
report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
(UseTLS ? "" : "not ") + "be threadlocal");
}
return UnsafeStackPtr;
}
Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
if (!TM.getTargetTriple().isAndroid())
return getDefaultSafeStackPointerLocation(IRB, true);
// Android provides a libc function to retrieve the address of the current
// thread's unsafe stack pointer.
Module *M = IRB.GetInsertBlock()>getParent()>getParent();
Type *StackPtrTy = Type::getInt8PtrTy(M>getContext());
FunctionCallee Fn = M>getOrInsertFunction("__safestack_pointer_address",
StackPtrTy>getPointerTo(0));
return IRB.CreateCall(Fn);
}
//======//
// Loop Strength Reduction hooks
//======//
/// isLegalAddressingMode  Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
unsigned AS, Instruction *I) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
// Allows a signextended 16bit immediate field.
if (AM.BaseOffs <= (1LL << 16)  AM.BaseOffs >= (1LL << 16)1)
return false;
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
// Only support r+r,
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
case 1:
if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
return false;
// Otherwise we have r+r or r+i.
break;
case 2:
if (AM.HasBaseReg  AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
return false;
// Allow 2*r as r+r.
break;
default: // Don't allow n * r
return false;
}
return true;
}
//======//
// Stack Protector
//======//
// For OpenBSD return its special guard variable. Otherwise return nullptr,
// so that SelectionDAG handle SSP.
Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
Module &M = *IRB.GetInsertBlock()>getParent()>getParent();
PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
return M.getOrInsertGlobal("__guard_local", PtrTy);
}
return nullptr;
}
// Currently only support "standard" __stack_chk_guard.
// TODO: add LOAD_STACK_GUARD support.
void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
if (!M.getNamedValue("__stack_chk_guard"))
new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
GlobalVariable::ExternalLinkage,
nullptr, "__stack_chk_guard");
}
// Currently only support "standard" __stack_chk_guard.
// TODO: add LOAD_STACK_GUARD support.
Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
return M.getNamedValue("__stack_chk_guard");
}
Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
return nullptr;
}
unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
return MinimumJumpTableEntries;
}
void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
MinimumJumpTableEntries = Val;
}
unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const {
return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
}
unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
return MaximumJumpTableSize;
}
void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
MaximumJumpTableSize = Val;
}
//======//
// Reciprocal Estimates
//======//
/// Get the reciprocal estimate attribute string for a function that will
/// override the target defaults.
static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
const Function &F = MF.getFunction();
return F.getFnAttribute("reciprocalestimates").getValueAsString();
}
/// Construct a string for the given reciprocal operation of the given type.
/// This string should match the corresponding option to the frontend's
/// "mrecip" flag assuming those strings have been passed through in an
/// attribute string. For example, "vecdivf" for a division of a vXf32.
static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
std::string Name = VT.isVector() ? "vec" : "";
Name += IsSqrt ? "sqrt" : "div";
// TODO: Handle "half" or other float types?
if (VT.getScalarType() == MVT::f64) {
Name += "d";
} else {
assert(VT.getScalarType() == MVT::f32 &&
"Unexpected FP type for reciprocal estimate");
Name += "f";
}
return Name;
}
/// Return the character position and value (a single numeric character) of a
/// customized refinement operation in the input string if it exists. Return
/// false if there is no customized refinement step count.
static bool parseRefinementStep(StringRef In, size_t &Position,
uint8_t &Value) {
const char RefStepToken = ':';
Position = In.find(RefStepToken);
if (Position == StringRef::npos)
return false;
StringRef RefStepString = In.substr(Position + 1);
// Allow exactly one numeric character for the additional refinement
// step parameter.
if (RefStepString.size() == 1) {
char RefStepChar = RefStepString[0];
if (RefStepChar >= '0' && RefStepChar <= '9') {
Value = RefStepChar  '0';
return true;
}
}
report_fatal_error("Invalid refinement step for recip.");
}
/// For the input attribute string, return one of the ReciprocalEstimate enum
/// status values (enabled, disabled, or not specified) for this operation on
/// the specified data type.
static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
if (Override.empty())
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
SmallVector<StringRef, 4> OverrideVector;
Override.split(OverrideVector, ',');
unsigned NumArgs = OverrideVector.size();
// Check if "all", "none", or "default" was specified.
if (NumArgs == 1) {
// Look for an optional setting of the number of refinement steps needed
// for this type of reciprocal operation.
size_t RefPos;
uint8_t RefSteps;
if (parseRefinementStep(Override, RefPos, RefSteps)) {
// Split the string for further processing.
Override = Override.substr(0, RefPos);
}
// All reciprocal types are enabled.
if (Override == "all")
return TargetLoweringBase::ReciprocalEstimate::Enabled;
// All reciprocal types are disabled.
if (Override == "none")
return TargetLoweringBase::ReciprocalEstimate::Disabled;
// Target defaults for enablement are used.
if (Override == "default")
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
}
// The attribute string may omit the size suffix ('f'/'d').
std::string VTName = getReciprocalOpName(IsSqrt, VT);
std::string VTNameNoSize = VTName;
VTNameNoSize.pop_back();
static const char DisabledPrefix = '!';
for (StringRef RecipType : OverrideVector) {
size_t RefPos;
uint8_t RefSteps;
if (parseRefinementStep(RecipType, RefPos, RefSteps))
RecipType = RecipType.substr(0, RefPos);
// Ignore the disablement token for string matching.
bool IsDisabled = RecipType[0] == DisabledPrefix;
if (IsDisabled)
RecipType = RecipType.substr(1);
if (RecipType.equals(VTName)  RecipType.equals(VTNameNoSize))
return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
: TargetLoweringBase::ReciprocalEstimate::Enabled;
}
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
}
/// For the input attribute string, return the customized refinement step count
/// for this operation on the specified data type. If the step count does not
/// exist, return the ReciprocalEstimate enum value for unspecified.
static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
if (Override.empty())
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
SmallVector<StringRef, 4> OverrideVector;
Override.split(OverrideVector, ',');
unsigned NumArgs = OverrideVector.size();
// Check if "all", "default", or "none" was specified.
if (NumArgs == 1) {
// Look for an optional setting of the number of refinement steps needed
// for this type of reciprocal operation.
size_t RefPos;
uint8_t RefSteps;
if (!parseRefinementStep(Override, RefPos, RefSteps))
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
// Split the string for further processing.
Override = Override.substr(0, RefPos);
assert(Override != "none" &&
"Disabled reciprocals, but specifed refinement steps?");
// If this is a general override, return the specified number of steps.
if (Override == "all"  Override == "default")
return RefSteps;
}
// The attribute string may omit the size suffix ('f'/'d').
std::string VTName = getReciprocalOpName(IsSqrt, VT);
std::string VTNameNoSize = VTName;
VTNameNoSize.pop_back();
for (StringRef RecipType : OverrideVector) {
size_t RefPos;
uint8_t RefSteps;
if (!parseRefinementStep(RecipType, RefPos, RefSteps))
continue;
RecipType = RecipType.substr(0, RefPos);
if (RecipType.equals(VTName)  RecipType.equals(VTNameNoSize))
return RefSteps;
}
return TargetLoweringBase::ReciprocalEstimate::Unspecified;
}
int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
MachineFunction &MF) const {
return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
}
int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
MachineFunction &MF) const {
return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
}
int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
MachineFunction &MF) const {
return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
}
int TargetLoweringBase::getDivRefinementSteps(EVT VT,
MachineFunction &MF) const {
return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
}
void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const {
MF.getRegInfo().freezeReservedRegs(MF);
}
diff git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 84b4e97420c7..77311c5b918b 100644
 a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ 1,12359 +1,12359 @@
//=== AArch64ISelLowering.cpp  AArch64 DAG Lowering Implementation ===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDXLicenseIdentifier: Apache2.0 WITH LLVMexception
//
//======//
//
// This file implements the AArch64TargetLowering class.
//
//======//
#include "AArch64ExpandImm.h"
#include "AArch64ISelLowering.h"
#include "AArch64CallingConvention.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64PerfectShuffle.h"
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <algorithm>
#include <bitset>
#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "aarch64lower"
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumShiftInserts, "Number of vector shift inserts");
STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
static cl::opt<bool>
EnableAArch64SlrGeneration("aarch64shiftinsertgeneration", cl::Hidden,
cl::desc("Allow AArch64 SLI/SRI formation"),
cl::init(false));
// FIXME: The necessary dtprel relocations don't seem to be supported
// well in the GNU bfd and gold linkers at the moment. Therefore, by
// default, for now, fall back to GeneralDynamic code generation.
cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
"aarch64elfldtlsgeneration", cl::Hidden,
cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
cl::init(false));
static cl::opt<bool>
EnableOptimizeLogicalImm("aarch64enablelogicalimm", cl::Hidden,
cl::desc("Enable AArch64 logical imm instruction "
"optimization"),
cl::init(true));
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
// we have to make something up. Arbitrarily, choose ZeroOrOne.
setBooleanContents(ZeroOrOneBooleanContent);
// When comparing vectors the result sets the different elements in the
// vector to allone or allzero.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
// Set up the register classes.
addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
if (Subtarget>hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
}
if (Subtarget>hasNEON()) {
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
// Someone set us up the NEON.
addDRTypeForNEON(MVT::v2f32);
addDRTypeForNEON(MVT::v8i8);
addDRTypeForNEON(MVT::v4i16);
addDRTypeForNEON(MVT::v2i32);
addDRTypeForNEON(MVT::v1i64);
addDRTypeForNEON(MVT::v1f64);
addDRTypeForNEON(MVT::v4f16);
addQRTypeForNEON(MVT::v4f32);
addQRTypeForNEON(MVT::v2f64);
addQRTypeForNEON(MVT::v16i8);
addQRTypeForNEON(MVT::v8i16);
addQRTypeForNEON(MVT::v4i32);
addQRTypeForNEON(MVT::v2i64);
addQRTypeForNEON(MVT::v8f16);
}
if (Subtarget>hasSVE()) {
// Add legal sve predicate types
addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
// Add legal sve data types
addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv1f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass);
addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
}
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget>getRegisterInfo());
// Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::i64, Custom);
setOperationAction(ISD::SETCC, MVT::f16, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
setOperationAction(ISD::FREM, MVT::f80, Expand);
setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
// Custom lowering hooks are needed for XOR
// to fold it into CSINC/CSINV.
setOperationAction(ISD::XOR, MVT::i32, Custom);
setOperationAction(ISD::XOR, MVT::i64, Custom);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
setOperationAction(ISD::FABS, MVT::f128, Expand);
setOperationAction(ISD::FADD, MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
setOperationAction(ISD::FCOS, MVT::f128, Expand);
setOperationAction(ISD::FDIV, MVT::f128, Custom);
setOperationAction(ISD::FMA, MVT::f128, Expand);
setOperationAction(ISD::FMUL, MVT::f128, Custom);
setOperationAction(ISD::FNEG, MVT::f128, Expand);
setOperationAction(ISD::FPOW, MVT::f128, Expand);
setOperationAction(ISD::FREM, MVT::f128, Expand);
setOperationAction(ISD::FRINT, MVT::f128, Expand);
setOperationAction(ISD::FSIN, MVT::f128, Expand);
setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
setOperationAction(ISD::FSQRT, MVT::f128, Expand);
setOperationAction(ISD::FSUB, MVT::f128, Custom);
setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
setOperationAction(ISD::SETCC, MVT::f128, Custom);
setOperationAction(ISD::BR_CC, MVT::f128, Custom);
setOperationAction(ISD::SELECT, MVT::f128, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
// Lowering for many of the conversions is actually specified by the nonf128
// type. The LowerXXX function will be trivial when f128 isn't involved.
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
// Variable arguments.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Custom);
setOperationAction(ISD::VACOPY, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
// Variablesized objects.
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
if (Subtarget>isTargetWindows())
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
// Constant pool entries
setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
// BlockAddress
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
// Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
setOperationAction(ISD::ADDC, MVT::i32, Custom);
setOperationAction(ISD::ADDE, MVT::i32, Custom);
setOperationAction(ISD::SUBC, MVT::i32, Custom);
setOperationAction(ISD::SUBE, MVT::i32, Custom);
setOperationAction(ISD::ADDC, MVT::i64, Custom);
setOperationAction(ISD::ADDE, MVT::i64, Custom);
setOperationAction(ISD::SUBC, MVT::i64, Custom);
setOperationAction(ISD::SUBE, MVT::i64, Custom);
// AArch64 lacks both leftrotate and popcount instructions.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
 for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
}
// AArch64 doesn't have {US}MUL_LOHI.
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i64, Custom);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
 for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
}
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i64, Expand);
// Custom lower Add/Sub/Mul with overflow.
setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction(ISD::SADDO, MVT::i64, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
setOperationAction(ISD::UADDO, MVT::i64, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::SSUBO, MVT::i64, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::UMULO, MVT::i32, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
setOperationAction(ISD::FSIN, MVT::f32, Expand);
setOperationAction(ISD::FSIN, MVT::f64, Expand);
setOperationAction(ISD::FCOS, MVT::f32, Expand);
setOperationAction(ISD::FCOS, MVT::f64, Expand);
setOperationAction(ISD::FPOW, MVT::f32, Expand);
setOperationAction(ISD::FPOW, MVT::f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
if (Subtarget>hasFullFP16())
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
else
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::f16, Promote);
setOperationAction(ISD::FREM, MVT::v4f16, Expand);
setOperationAction(ISD::FREM, MVT::v8f16, Expand);
setOperationAction(ISD::FPOW, MVT::f16, Promote);
setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
setOperationAction(ISD::FPOWI, MVT::f16, Promote);
setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
setOperationAction(ISD::FCOS, MVT::f16, Promote);
setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FSIN, MVT::f16, Promote);
setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP, MVT::f16, Promote);
setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
setOperationAction(ISD::FEXP2, MVT::f16, Promote);
setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG, MVT::f16, Promote);
setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG2, MVT::f16, Promote);
setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
setOperationAction(ISD::FLOG10, MVT::f16, Promote);
setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
if (!Subtarget>hasFullFP16()) {
setOperationAction(ISD::SELECT, MVT::f16, Promote);
setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
setOperationAction(ISD::SETCC, MVT::f16, Promote);
setOperationAction(ISD::BR_CC, MVT::f16, Promote);
setOperationAction(ISD::FADD, MVT::f16, Promote);
setOperationAction(ISD::FSUB, MVT::f16, Promote);
setOperationAction(ISD::FMUL, MVT::f16, Promote);
setOperationAction(ISD::FDIV, MVT::f16, Promote);
setOperationAction(ISD::FMA, MVT::f16, Promote);
setOperationAction(ISD::FNEG, MVT::f16, Promote);
setOperationAction(ISD::FABS, MVT::f16, Promote);
setOperationAction(ISD::FCEIL, MVT::f16, Promote);
setOperationAction(ISD::FSQRT, MVT::f16, Promote);
setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
setOperationAction(ISD::FRINT, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
// promote v4f16 to v4f32 when that is known to be safe.
setOperationAction(ISD::FADD, MVT::v4f16, Promote);
setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
setOperationAction(ISD::FABS, MVT::v4f16, Expand);
setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
setOperationAction(ISD::FABS, MVT::v8f16, Expand);
setOperationAction(ISD::FADD, MVT::v8f16, Expand);
setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
setOperationAction(ISD::FMA, MVT::v8f16, Expand);
setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
}
// AArch64 has implementations of a lot of roundinglike FP operations.
for (MVT Ty : {MVT::f32, MVT::f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
setOperationAction(ISD::FMINNUM, Ty, Legal);
setOperationAction(ISD::FMAXNUM, Ty, Legal);
setOperationAction(ISD::FMINIMUM, Ty, Legal);
setOperationAction(ISD::FMAXIMUM, Ty, Legal);
setOperationAction(ISD::LROUND, Ty, Legal);
setOperationAction(ISD::LLROUND, Ty, Legal);
setOperationAction(ISD::LRINT, Ty, Legal);
setOperationAction(ISD::LLRINT, Ty, Legal);
}
if (Subtarget>hasFullFP16()) {
setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
setOperationAction(ISD::FCEIL, MVT::f16, Legal);
setOperationAction(ISD::FRINT, MVT::f16, Legal);
setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
setOperationAction(ISD::FROUND, MVT::f16, Legal);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
}
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
// This requires the Performance Monitors extension.
if (Subtarget>hasPerfMon())
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
// Issue __sincos_stret if available.
setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
} else {
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
// Make floatingpoint constants legal for the large code model, so they don't
// become loads from the constant pool.
if (Subtarget>isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
}
// AArch64 does not have floatingpoint extending loads, i1 signextending
// load, floatingpoint truncating stores, or v2i32>v2i16 truncating store.
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
}
for (MVT VT : MVT::integer_valuetypes())
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f128, MVT::f80, Expand);
setTruncStoreAction(MVT::f128, MVT::f64, Expand);
setTruncStoreAction(MVT::f128, MVT::f32, Expand);
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
// Indexed loads and stores are supported.
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::i8, Legal);
setIndexedLoadAction(im, MVT::i16, Legal);
setIndexedLoadAction(im, MVT::i32, Legal);
setIndexedLoadAction(im, MVT::i64, Legal);
setIndexedLoadAction(im, MVT::f64, Legal);
setIndexedLoadAction(im, MVT::f32, Legal);
setIndexedLoadAction(im, MVT::f16, Legal);
setIndexedStoreAction(im, MVT::i8, Legal);
setIndexedStoreAction(im, MVT::i16, Legal);
setIndexedStoreAction(im, MVT::i32, Legal);
setIndexedStoreAction(im, MVT::i64, Legal);
setIndexedStoreAction(im, MVT::f64, Legal);
setIndexedStoreAction(im, MVT::f32, Legal);
setIndexedStoreAction(im, MVT::f16, Legal);
}
// Trap.
setOperationAction(ISD::TRAP, MVT::Other, Legal);
if (Subtarget>isTargetWindows())
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// We combine OR nodes for bitfield operations.
setTargetDAGCombine(ISD::OR);
// Try to create BICs for vector ANDs.
setTargetDAGCombine(ISD::AND);
// Vector add and sub nodes may conceal a highhalf opportunity.
// Also, try to fold ADD into CSINC/CSINV..
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
setTargetDAGCombine(ISD::FDIV);
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
setTargetDAGCombine(ISD::STORE);
if (Subtarget>supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::INTRINSIC_VOID);
setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::GlobalAddress);
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset = Subtarget>requiresStrictAlign()
? MaxStoresPerMemsetOptSize : 32;
MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;
MaxStoresPerMemcpy = Subtarget>requiresStrictAlign()
? MaxStoresPerMemcpyOptSize : 16;
MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
MaxLoadsPerMemcmpOptSize = 4;
MaxLoadsPerMemcmp = Subtarget>requiresStrictAlign()
? MaxLoadsPerMemcmpOptSize : 8;
setStackPointerRegisterToSaveRestore(AArch64::SP);
setSchedulingPreference(Sched::Hybrid);
EnableExtLdPromotion = true;
// Set required alignment.
setMinFunctionAlignment(llvm::Align(4));
// Set preferred alignments.
setPrefLoopAlignment(llvm::Align(1ULL << STI.getPrefLoopLogAlignment()));
setPrefFunctionAlignment(
llvm::Align(1ULL << STI.getPrefFunctionLogAlignment()));
// Only change the limit for entries in a jump table if specified by
// the sub target, but not at the command line.
unsigned MaxJT = STI.getMaximumJumpTableSize();
if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
setMaximumJumpTableSize(MaxJT);
setHasExtractBitsInsn(true);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget>hasNEON()) {
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
setOperationAction(ISD::FABS, MVT::v1f64, Expand);
setOperationAction(ISD::FADD, MVT::v1f64, Expand);
setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
setOperationAction(ISD::FMA, MVT::v1f64, Expand);
setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
setOperationAction(ISD::FREM, MVT::v1f64, Expand);
setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// AArch64 doesn't have a direct vector >f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
// i8 vector elements also need promotion to i32 for v8i8
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
// Similarly, there is no direct i32 > f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
// Or, direct i32 > f16 vector conversion. Set it so custom, so the
// conversion happens in two steps: v4i32 > v4f32 > v4f16
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
if (Subtarget>hasFullFP16()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
}
setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quadvector types to detect MULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
// Vector reductions
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
}
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
}
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
// Likewise, narrowing and extending vector loads/stores aren't handled
// directly.
 for (MVT VT : MVT::vector_valuetypes()) {
+ for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT == MVT::v16i8  VT == MVT::v8i16  VT == MVT::v4i32) {
setOperationAction(ISD::MULHS, VT, Legal);
setOperationAction(ISD::MULHU, VT, Legal);
} else {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
}
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTTZ, VT, Expand);
 for (MVT InnerVT : MVT::vector_valuetypes()) {
+ for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, InnerVT, Expand);
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
}
}
// AArch64 has implementations of a lot of roundinglike FP operations.
for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
}
if (Subtarget>hasFullFP16()) {
for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
setOperationAction(ISD::FFLOOR, Ty, Legal);
setOperationAction(ISD::FNEARBYINT, Ty, Legal);
setOperationAction(ISD::FCEIL, Ty, Legal);
setOperationAction(ISD::FRINT, Ty, Legal);
setOperationAction(ISD::FTRUNC, Ty, Legal);
setOperationAction(ISD::FROUND, Ty, Legal);
}
}
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
}
PredictableSelectIsExpensive = Subtarget>predictableSelectIsExpensive();
}
void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
assert(VT.isVector() && "VT should be a vector type");
if (VT.isFloatingPoint()) {
MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
}
// Mark vector float intrinsics as expand.
if (VT == MVT::v2f32  VT == MVT::v4f32  VT == MVT::v2f64) {
setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
setOperationAction(ISD::FEXP, VT, Expand);
setOperationAction(ISD::FEXP2, VT, Expand);
// But we do support customlowering for FCOPYSIGN.
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
for (MVT InnerVT : MVT::all_valuetypes())
setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
// CNT supports only B element sizes, then use UADDLP to widen.
if (VT != MVT::v8i8 && VT != MVT::v16i8)
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::SREM, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);
// [SU][MINMAX] are available for all NEON types apart from i64.
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
// F[MINMAX][NUMNAN] are available for all FP NEON types.
if (VT.isFloatingPoint() &&
(VT.getVectorElementType() != MVT::f16  Subtarget>hasFullFP16()))
for (unsigned Opcode :
{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
setOperationAction(Opcode, VT, Legal);
if (Subtarget>isLittleEndian()) {
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
}
}
}
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR64RegClass);
addTypeForNEON(VT, MVT::v2i32);
}
void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
addRegisterClass(VT, &AArch64::FPR128RegClass);
addTypeForNEON(VT, MVT::v4i32);
}
EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
const APInt &Demanded,
TargetLowering::TargetLoweringOpt &TLO,
unsigned NewOpc) {
uint64_t OldImm = Imm, NewImm, Enc;
uint64_t Mask = ((uint64_t)(1LL) >> (64  Size)), OrigMask = Mask;
// Return if the immediate is already all zeros, all ones, a bimm32 or a
// bimm64.
if (Imm == 0  Imm == Mask 
AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
return false;
unsigned EltSize = Size;
uint64_t DemandedBits = Demanded.getZExtValue();
// Clear bits that are not demanded.
Imm &= DemandedBits;
while (true) {
// The goal here is to set the nondemanded bits in a way that minimizes
// the number of switching between 0 and 1. In order to achieve this goal,
// we set the nondemanded bits to the value of the preceding demanded bits.
// For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
// nondemanded bit), we copy bit0 (1) to the least significant 'x',
// bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
// The final result is 0b11000011.
uint64_t NonDemandedBits = ~DemandedBits;
uint64_t InvertedImm = ~Imm & DemandedBits;
uint64_t RotatedImm =
((InvertedImm << 1)  (InvertedImm >> (EltSize  1) & 1)) &
NonDemandedBits;
uint64_t Sum = RotatedImm + NonDemandedBits;
bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize  1));
uint64_t Ones = (Sum + Carry) & NonDemandedBits;
NewImm = (Imm  Ones) & Mask;
// If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
// or allones or allzeros, in which case we can stop searching. Otherwise,
// we halve the element size and continue the search.
if (isShiftedMask_64(NewImm)  isShiftedMask_64(~(NewImm  ~Mask)))
break;
// We cannot shrink the element size any further if it is 2bits.
if (EltSize == 2)
return false;
EltSize /= 2;
Mask >>= EltSize;
uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
// Return if there is mismatch in any of the demanded bits of Imm and Hi.
if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
return false;
// Merge the upper and lower halves of Imm and DemandedBits.
Imm = Hi;
DemandedBits = DemandedBitsHi;
}
++NumOptimizedImms;
// Replicate the element across the register width.
while (EltSize < Size) {
NewImm = NewImm << EltSize;
EltSize *= 2;
}
(void)OldImm;
assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered");
assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
// Create the new constant immediate node.
EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue New;
// If the new constant immediate is allzeros or allones, let the target
// independent DAG combine optimize this node.
if (NewImm == 0  NewImm == OrigMask) {
New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
TLO.DAG.getConstant(NewImm, DL, VT));
// Otherwise, create a machine node so that target independent DAG combine
// doesn't undo this optimization.
} else {
Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
New = SDValue(
TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
}
return TLO.CombineTo(Op, New);
}
bool AArch64TargetLowering::targetShrinkDemandedConstant(
SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
// Delay this optimization to as late as possible.
if (!TLO.LegalOps)
return false;
if (!EnableOptimizeLogicalImm)
return false;
EVT VT = Op.getValueType();
if (VT.isVector())
return false;
unsigned Size = VT.getSizeInBits();
assert((Size == 32  Size == 64) &&
"i32 or i64 is expected after legalization.");
// Exit early if we demand all bits.
if (Demanded.countPopulation() == Size)
return false;
unsigned NewOpc;
switch (Op.getOpcode()) {
default:
return false;
case ISD::AND:
NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
break;
case ISD::OR:
NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
break;
case ISD::XOR:
NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
break;
}
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!C)
return false;
uint64_t Imm = C>getZExtValue();
return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
}
/// computeKnownBitsForTargetNode  Determine which of the bits specified in
/// Mask are known to be either zero or one and return them Known.
void AArch64TargetLowering::computeKnownBitsForTargetNode(
const SDValue Op, KnownBits &Known,
const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
switch (Op.getOpcode()) {
default:
break;
case AArch64ISD::CSEL: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op>getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op>getOperand(1), Depth + 1);
Known.Zero &= Known2.Zero;
Known.One &= Known2.One;
break;
}
case AArch64ISD::LOADgot:
case AArch64ISD::ADDlow: {
if (!Subtarget>isTargetILP32())
break;
// In ILP32 mode all valid pointers are in the low 4GB of the addressspace.
Known.Zero = APInt::getHighBitsSet(64, 32);
break;
}
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op>getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN>getZExtValue());
switch (IntID) {
default: return;
case Intrinsic::aarch64_ldaxr:
case Intrinsic::aarch64_ldxr: {
unsigned BitWidth = Known.getBitWidth();
EVT VT = cast<MemIntrinsicSDNode>(Op)>getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth  MemBits);
return;
}
}
break;
}
case ISD::INTRINSIC_WO_CHAIN:
case ISD::INTRINSIC_VOID: {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))>getZExtValue();
switch (IntNo) {
default:
break;
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv: {
// Figure out the datatype of the vector operand. The UMINV instruction
// will zero extend the result, so we can mark as known zero all the
// bits larger than the element datatype. 32bit or larget doesn't need
// this as those are legal types and will be handled by isel directly.
MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
unsigned BitWidth = Known.getBitWidth();
if (VT == MVT::v8i8  VT == MVT::v16i8) {
assert(BitWidth >= 8 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth  8);
Known.Zero = Mask;
} else if (VT == MVT::v4i16  VT == MVT::v8i16) {
assert(BitWidth >= 16 && "Unexpected width!");
APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth  16);
Known.Zero = Mask;
}
break;
} break;
}
}
}
}
MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
return MVT::i64;
}
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget>requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128bit ones.
*Fast = !Subtarget>isMisaligned128StoreSlow()  VT.getStoreSize() != 16 
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Align <= 2 
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on microbenchmarks and olden/bh.
VT == MVT::v2i64;
}
return true;
}
// Same as above but handling LLTs instead.
bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
bool *Fast) const {
if (Subtarget>requiresStrictAlign())
return false;
if (Fast) {
// Some CPUs are fine with unaligned stores except for 128bit ones.
*Fast = !Subtarget>isMisaligned128StoreSlow() 
Ty.getSizeInBytes() != 16 
// See comments in performSTORECombine() for more details about
// these conditions.
// Code that uses clang vector extensions can mark that it
// wants unaligned accesses to be treated as fast by
// underspecifying alignment to be 1 or 2.
Align <= 2 
// Disregard v2i64. Memcpy lowering produces those and splitting
// them regresses performance on microbenchmarks and olden/bh.
Ty == LLT::vector(2, 64);
}
return true;
}
FastISel *
AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const {
return AArch64::createFastISel(funcInfo, libInfo);
}
const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((AArch64ISD::NodeType)Opcode) {
case AArch64ISD::FIRST_NUMBER: break;
case AArch64ISD::CALL: return "AArch64ISD::CALL";
case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
case AArch64ISD::ADR: return "AArch64ISD::ADR";
case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
case AArch64ISD::ADC: return "AArch64ISD::ADC";
case AArch64ISD::SBC: return "AArch64ISD::SBC";
case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
case AArch64ISD::DUP: return "AArch64ISD::DUP";
case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
case AArch64ISD::BICi: return "AArch64ISD::BICi";
case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
case AArch64ISD::BSL: return "AArch64ISD::BSL";
case AArch64ISD::NEG: return "AArch64ISD::NEG";
case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
case AArch64ISD::REV16: return "AArch64ISD::REV16";
case AArch64ISD::REV32: return "AArch64ISD::REV32";
case AArch64ISD::REV64: return "AArch64ISD::REV64";
case AArch64ISD::EXT: return "AArch64ISD::EXT";
case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
case AArch64ISD::NOT: return "AArch64ISD::NOT";
case AArch64ISD::BIT: return "AArch64ISD::BIT";
case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
case AArch64ISD::STG: return "AArch64ISD::STG";
case AArch64ISD::STZG: return "AArch64ISD::STZG";
case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
}
return nullptr;
}
MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
MachineBasicBlock *MBB) const {
// We materialise the F128CSEL pseudoinstruction as some control flow and a
// phi node:
// OrigBB:
// [... previous instrs leading to comparison ...]
// b.ne TrueBB
// b EndBB
// TrueBB:
// ; Fallthrough
// EndBB:
// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
MachineFunction *MF = MBB>getParent();
const TargetInstrInfo *TII = Subtarget>getInstrInfo();
const BasicBlock *LLVM_BB = MBB>getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator It = ++MBB>getIterator();
Register DestReg = MI.getOperand(0).getReg();
Register IfTrueReg = MI.getOperand(1).getReg();
Register IfFalseReg = MI.getOperand(2).getReg();
unsigned CondCode = MI.getOperand(3).getImm();
bool NZCVKilled = MI.getOperand(4).isKill();
MachineBasicBlock *TrueBB = MF>CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndBB = MF>CreateMachineBasicBlock(LLVM_BB);
MF>insert(It, TrueBB);
MF>insert(It, EndBB);
// Transfer rest of current basicblock to EndBB
EndBB>splice(EndBB>begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
MBB>end());
EndBB>transferSuccessorsAndUpdatePHIs(MBB);
BuildMI(MBB, DL, TII>get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
BuildMI(MBB, DL, TII>get(AArch64::B)).addMBB(EndBB);
MBB>addSuccessor(TrueBB);
MBB>addSuccessor(EndBB);
// TrueBB falls through to the end.
TrueBB>addSuccessor(EndBB);
if (!NZCVKilled) {
TrueBB>addLiveIn(AArch64::NZCV);
EndBB>addLiveIn(AArch64::NZCV);
}
BuildMI(*EndBB, EndBB>begin(), DL, TII>get(AArch64::PHI), DestReg)
.addReg(IfTrueReg)
.addMBB(TrueBB)
.addReg(IfFalseReg)
.addMBB(MBB);
MI.eraseFromParent();
return EndBB;
}
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
MachineInstr &MI, MachineBasicBlock *BB) const {
assert(!isAsynchronousEHPersonality(classifyEHPersonality(
BB>getParent()>getFunction().getPersonalityFn())) &&
"SEH does not use catchret!");
return BB;
}
MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
MachineInstr &MI, MachineBasicBlock *BB) const {
MI.eraseFromParent();
return BB;
}
MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
switch (MI.getOpcode()) {
default:
#ifndef NDEBUG
MI.dump();
#endif
llvm_unreachable("Unexpected instruction for custom inserter!");
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case AArch64::CATCHPAD:
return EmitLoweredCatchPad(MI, BB);
}
}
//======//
// AArch64 Lowering private implementation.
//======//
//======//
// Lowering Code
//======//
/// changeIntCCToAArch64CC  Convert a DAG integer condition code to an AArch64
/// CC
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unknown condition code!");
case ISD::SETNE:
return AArch64CC::NE;
case ISD::SETEQ:
return AArch64CC::EQ;
case ISD::SETGT:
return AArch64CC::GT;
case ISD::SETGE:
return AArch64CC::GE;
case ISD::SETLT:
return AArch64CC::LT;
case ISD::SETLE:
return AArch64CC::LE;
case ISD::SETUGT:
return AArch64CC::HI;
case ISD::SETUGE:
return AArch64CC::HS;
case ISD::SETULT:
return AArch64CC::LO;
case ISD::SETULE:
return AArch64CC::LS;
}
}
/// changeFPCCToAArch64CC  Convert a DAG fp condition code to an AArch64 CC.
static void changeFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
llvm_unreachable("Unknown FP condition!");
case ISD::SETEQ:
case ISD::SETOEQ:
CondCode = AArch64CC::EQ;
break;
case ISD::SETGT:
case ISD::SETOGT:
CondCode = AArch64CC::GT;
break;
case ISD::SETGE:
case ISD::SETOGE:
CondCode = AArch64CC::GE;
break;
case ISD::SETOLT:
CondCode = AArch64CC::MI;
break;
case ISD::SETOLE:
CondCode = AArch64CC::LS;
break;
case ISD::SETONE:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GT;
break;
case ISD::SETO:
CondCode = AArch64CC::VC;
break;
case ISD::SETUO:
CondCode = AArch64CC::VS;
break;
case ISD::SETUEQ:
CondCode = AArch64CC::EQ;
CondCode2 = AArch64CC::VS;
break;
case ISD::SETUGT:
CondCode = AArch64CC::HI;
break;
case ISD::SETUGE:
CondCode = AArch64CC::PL;
break;
case ISD::SETLT:
case ISD::SETULT:
CondCode = AArch64CC::LT;
break;
case ISD::SETLE:
case ISD::SETULE:
CondCode = AArch64CC::LE;
break;
case ISD::SETNE:
case ISD::SETUNE:
CondCode = AArch64CC::NE;
break;
}
}
/// Convert a DAG fp condition code to an AArch64 CC.
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
/// should be AND'ed instead of OR'ed.
static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
assert(CondCode2 == AArch64CC::AL);
break;
case ISD::SETONE:
// (a one b)
// == ((a olt b)  (a ogt b))
// == ((a ord b) && (a une b))
CondCode = AArch64CC::VC;
CondCode2 = AArch64CC::NE;
break;
case ISD::SETUEQ:
// (a ueq b)
// == ((a uno b)  (a oeq b))
// == ((a ule b) && (a uge b))
CondCode = AArch64CC::PL;
CondCode2 = AArch64CC::LE;
break;
}
}
/// changeVectorFPCCToAArch64CC  Convert a DAG fp condition code to an AArch64
/// CC usable with the vector instructions. Fewer operations are available
/// without a real NZCV register, so we have to use less efficient combinations
/// to get the same effect.
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2,
bool &Invert) {
Invert = false;
switch (CC) {
default:
// Mostly the scalar mappings work fine.
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
break;
case ISD::SETUO:
Invert = true;
LLVM_FALLTHROUGH;
case ISD::SETO:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GE;
break;
case ISD::SETUEQ:
case ISD::SETULT:
case ISD::SETULE:
case ISD::SETUGT:
case ISD::SETUGE:
// All of the comparemask comparisons are ordered, but we can switch
// between the two by a double inversion. E.g. ULE == !OGT.
Invert = true;
changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
break;
}
}
static bool isLegalArithImmed(uint64_t C) {
// Matches AArch64DAGToDAGISel::SelectArithImmed().
bool IsLegal = (C >> 12 == 0)  ((C & 0xFFFULL) == 0 && C >> 24 == 0);
LLVM_DEBUG(dbgs() << "Is imm " << C
<< " legal: " << (IsLegal ? "yes\n" : "no\n"));
return IsLegal;
}
// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
// the grounds that "op1  (op2) == op1 + op2" ? Not always, the C and V flags
// can be set differently by this operation. It comes down to whether
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
// everything is fine. If not then the optimization is wrong. Thus general
// comparisons are only valid if op2 != 0.
//
// So, finally, the only LLVMnative comparisons that don't mention C and V
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
// the absence of information about op2.
static bool isCMN(SDValue Op, ISD::CondCode CC) {
return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
(CC == ISD::SETEQ  CC == ISD::SETNE);
}
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG) {
EVT VT = LHS.getValueType();
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (VT.isFloatingPoint()) {
assert(VT != MVT::f128);
if (VT == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
VT = MVT::f32;
}
return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
}
// The CMP instruction is just an alias for SUBS, and representing it as
// SUBS means that it's possible to get CSE with subtract operations.
// A later phase can perform the optimization of setting the destination
// register to WZR/XZR if it ends up being unused.
unsigned Opcode = AArch64ISD::SUBS;
if (isCMN(RHS, CC)) {
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
RHS = RHS.getOperand(1);
} else if (isCMN(LHS, CC)) {
// As we are looking for EQ/NE compares, the operands can be commuted ; can
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
Opcode = AArch64ISD::ADDS;
LHS = LHS.getOperand(1);
} else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
!isUnsignedIntSetCC(CC)) {
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
// of the signed comparisons.
Opcode = AArch64ISD::ANDS;
RHS = LHS.getOperand(1);
LHS = LHS.getOperand(0);
}
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
.getValue(1);
}
/// \defgroup AArch64CCMP CMP;CCMP matching
///
/// These functions deal with the formation of CMP;CCMP;... sequences.
/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
/// a comparison. They set the NZCV flags to a predefined value if their
/// predicate is false. This allows to express arbitrary conjunctions, for
/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
/// expressed as:
/// cmp A
/// ccmp B, inv(CB), CA
/// check for CB flags
///
/// This naturally lets us implement chains of AND operations with SETCC
/// operands. And we can even implement some other situations by transforming
/// them:
///  We can implement (NEG SETCC) i.e. negating a single comparison by
/// negating the flags used in a CCMP/FCCMP operations.
///  We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
/// by negating the flags we test for afterwards. i.e.
/// NEG (CMP CCMP CCCMP ...) can be implemented.
///  Note that we can only ever negate all previously processed results.
/// What we can not implement by flipping the flags to test is a negation
/// of two subtrees (because the negation affects all subtrees emitted so
/// far, so the 2nd subtree we emit would also affect the first).
/// With those tools we can implement some OR operations:
///  (OR (SETCC A) (SETCC B)) can be implemented via:
/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
///  After transforming OR to NEG/AND combinations we may be able to use NEG
/// elimination rules from earlier to implement the whole thing as a
/// CCMP/FCCMP chain.
///
/// As complete example:
/// or (or (setCA (cmp A)) (setCB (cmp B)))
/// (and (setCC (cmp C)) (setCD (cmp D)))"
/// can be reassociated to:
/// or (and (setCC (cmp C)) setCD (cmp D))
// (or (setCA (cmp A)) (setCB (cmp B)))
/// can be transformed to:
/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
/// which can be implemented as:
/// cmp C
/// ccmp D, inv(CD), CC
/// ccmp A, CA, inv(CD)
/// ccmp B, CB, inv(CA)
/// check for CB flags
///
/// A counterexample is "or (and A B) (and C D)" which translates to
/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
/// can only implement 1 of the inner (not) operations, but not both!
/// @{
/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
ISD::CondCode CC, SDValue CCOp,
AArch64CC::CondCode Predicate,
AArch64CC::CondCode OutCC,
const SDLoc &DL, SelectionDAG &DAG) {
unsigned Opcode = 0;
const bool FullFP16 =
static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
if (LHS.getValueType().isFloatingPoint()) {
assert(LHS.getValueType() != MVT::f128);
if (LHS.getValueType() == MVT::f16 && !FullFP16) {
LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
}
Opcode = AArch64ISD::FCCMP;
} else if (RHS.getOpcode() == ISD::SUB) {
SDValue SubOp0 = RHS.getOperand(0);
if (isNullConstant(SubOp0) && (CC == ISD::SETEQ  CC == ISD::SETNE)) {
// See emitComparison() on why we can only do this for SETEQ and SETNE.
Opcode = AArch64ISD::CCMN;
RHS = RHS.getOperand(1);
}
}
if (Opcode == 0)
Opcode = AArch64ISD::CCMP;
SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
}
/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
/// expressed as a conjunction. See \ref AArch64CCMP.
/// \param CanNegate Set to true if we can negate the whole subtree just by
/// changing the conditions on the SETCC tests.
/// (this means we can call emitConjunctionRec() with
/// Negate==true on this subtree)
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
/// cannot do the negation naturally. We are required to
/// emit the subtree first in this case.
/// \param WillNegate Is true if are called when the result of this
/// subexpression must be negated. This happens when the
/// outer expression is an OR. We can use this fact to know
/// that we have a double negation (or (or ...) ...) that
/// can be implemented for free.
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
bool &MustBeFirst, bool WillNegate,
unsigned Depth = 0) {
if (!Val.hasOneUse())
return false;
unsigned Opcode = Val>getOpcode();
if (Opcode == ISD::SETCC) {
if (Val>getOperand(0).getValueType() == MVT::f128)
return false;
CanNegate = true;
MustBeFirst = false;
return true;
}
// Protect against exponential runtime and stack overflow.
if (Depth > 6)
return false;
if (Opcode == ISD::AND  Opcode == ISD::OR) {
bool IsOR = Opcode == ISD::OR;
SDValue O0 = Val>getOperand(0);
SDValue O1 = Val>getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
return false;
bool CanNegateR;
bool MustBeFirstR;
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
return false;
if (MustBeFirstL && MustBeFirstR)
return false;
if (IsOR) {
// For an OR expression we need to be able to naturally negate at least
// one side or we cannot do the transformation at all.
if (!CanNegateL && !CanNegateR)
return false;
// If we the result of the OR will be negated and we can naturally negate
// the leafs, then this subtree as a whole negates naturally.
CanNegate = WillNegate && CanNegateL && CanNegateR;
// If we cannot naturally negate the whole subtree, then this must be
// emitted first.
MustBeFirst = !CanNegate;
} else {
assert(Opcode == ISD::AND && "Must be OR or AND");
// We cannot naturally negate an AND operation.
CanNegate = false;
MustBeFirst = MustBeFirstL  MustBeFirstR;
}
return true;
}
return false;
}
/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
/// Tries to transform the given i1 producing node @p Val to a series compare
/// and conditional compare operations. @returns an NZCV flags producing node
/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
/// transformation was not possible.
/// \p Negate is true if we want this subtree being negated just by changing
/// SETCC conditions.
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
AArch64CC::CondCode Predicate) {
// We're at a tree leaf, produce a conditional comparison operation.
unsigned Opcode = Val>getOpcode();
if (Opcode == ISD::SETCC) {
SDValue LHS = Val>getOperand(0);
SDValue RHS = Val>getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Val>getOperand(2))>get();
bool isInteger = LHS.getValueType().isInteger();
if (Negate)
CC = getSetCCInverse(CC, isInteger);
SDLoc DL(Val);
// Determine OutCC and handle FP special case.
if (isInteger) {
OutCC = changeIntCCToAArch64CC(CC);
} else {
assert(LHS.getValueType().isFloatingPoint());
AArch64CC::CondCode ExtraCC;
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
// Some floating point conditions can't be tested with a single condition
// code. Construct an additional comparison in this case.
if (ExtraCC != AArch64CC::AL) {
SDValue ExtraCmp;
if (!CCOp.getNode())
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
else
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
ExtraCC, DL, DAG);
CCOp = ExtraCmp;
Predicate = ExtraCC;
}
}
// Produce a normal comparison if we are first in the chain
if (!CCOp)
return emitComparison(LHS, RHS, CC, DL, DAG);
// Otherwise produce a ccmp.
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
DAG);
}
assert(Val>hasOneUse() && "Valid conjunction/disjunction tree");
bool IsOR = Opcode == ISD::OR;
SDValue LHS = Val>getOperand(0);
bool CanNegateL;
bool MustBeFirstL;
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
SDValue RHS = Val>getOperand(1);
bool CanNegateR;
bool MustBeFirstR;
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
// Swap subtree that must come first to the right side.
if (MustBeFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
std::swap(MustBeFirstL, MustBeFirstR);
}
bool NegateR;
bool NegateAfterR;
bool NegateL;
bool NegateAfterAll;
if (Opcode == ISD::OR) {
// Swap the subtree that we can negate naturally to the left.
if (!CanNegateL) {
assert(CanNegateR && "at least one side must be negatable");
assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
assert(!Negate);
std::swap(LHS, RHS);
NegateR = false;
NegateAfterR = true;
} else {
// Negate the left subtree if possible, otherwise negate the result.
NegateR = CanNegateR;
NegateAfterR = !CanNegateR;
}
NegateL = true;
NegateAfterAll = !Negate;
} else {
assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
assert(!Negate && "Valid conjunction/disjunction tree");
NegateL = false;
NegateR = false;
NegateAfterR = false;
NegateAfterAll = false;
}
// Emit subtrees.
AArch64CC::CondCode RHSCC;
SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
if (NegateAfterR)
RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
if (NegateAfterAll)
OutCC = AArch64CC::getInvertedCondCode(OutCC);
return CmpL;
}
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
/// In some cases this is even possible with OR operations in the expression.
/// See \ref AArch64CCMP.
/// \see emitConjunctionRec().
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC) {
bool DummyCanNegate;
bool DummyMustBeFirst;
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
return SDValue();
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
}
/// @}
/// Returns how profitable it is to fold a comparison's operand's shift and/or
/// extension operations.
static unsigned getCmpOperandFoldingProfit(SDValue Op) {
auto isSupportedExtend = [&](SDValue V) {
if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
return true;
if (V.getOpcode() == ISD::AND)
if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
uint64_t Mask = MaskCst>getZExtValue();
return (Mask == 0xFF  Mask == 0xFFFF  Mask == 0xFFFFFFFF);
}
return false;
};
if (!Op.hasOneUse())
return 0;
if (isSupportedExtend(Op))
return 1;
unsigned Opc = Op.getOpcode();
if (Opc == ISD::SHL  Opc == ISD::SRL  Opc == ISD::SRA)
if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
uint64_t Shift = ShiftCst>getZExtValue();
if (isSupportedExtend(Op.getOperand(0)))
return (Shift <= 4) ? 2 : 1;
EVT VT = Op.getValueType();
if ((VT == MVT::i32 && Shift <= 31)  (VT == MVT::i64 && Shift <= 63))
return 1;
}
return 0;
}
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &AArch64cc, SelectionDAG &DAG,
const SDLoc &dl) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
EVT VT = RHS.getValueType();
uint64_t C = RHSC>getZExtValue();
if (!isLegalArithImmed(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default:
break;
case ISD::SETLT:
case ISD::SETGE:
if ((VT == MVT::i32 && C != 0x80000000 &&
isLegalArithImmed((uint32_t)(C  1))) 
(VT == MVT::i64 && C != 0x80000000ULL &&
isLegalArithImmed(C  1ULL))) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
C = (VT == MVT::i32) ? (uint32_t)(C  1) : C  1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
if ((VT == MVT::i32 && C != 0 &&
isLegalArithImmed((uint32_t)(C  1))) 
(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C  1ULL))) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
C = (VT == MVT::i32) ? (uint32_t)(C  1) : C  1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETLE:
case ISD::SETGT:
if ((VT == MVT::i32 && C != INT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) 
(VT == MVT::i64 && C != INT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
if ((VT == MVT::i32 && C != UINT32_MAX &&
isLegalArithImmed((uint32_t)(C + 1))) 
(VT == MVT::i64 && C != UINT64_MAX &&
isLegalArithImmed(C + 1ULL))) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
RHS = DAG.getConstant(C, dl, VT);
}
break;
}
}
}
// Comparisons are canonicalized so that the RHS operand is simpler than the
// LHS one, the extreme case being when RHS is an immediate. However, AArch64
// can fold some shift+extend operations on the RHS operand, so swap the
// operands if that can be done.
//
// For example:
// lsl w13, w11, #1
// cmp w13, w12
// can be turned into:
// cmp w12, w11, lsl #1
if (!isa<ConstantSDNode>(RHS) 
!isLegalArithImmed(cast<ConstantSDNode>(RHS)>getZExtValue())) {
SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
std::swap(LHS, RHS);
CC = ISD::getSetCCSwappedOperands(CC);
}
}
SDValue Cmp;
AArch64CC::CondCode AArch64CC;
if ((CC == ISD::SETEQ  CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
// For the i8 operand, the largest immediate is 255, so this can be easily
// encoded in the compare instruction. For the i16 operand, however, the
// largest immediate cannot be encoded in the compare.
// Therefore, use a sign extending load and cmn to avoid materializing the
// 1 constant. For example,
// movz w1, #65535
// ldrh w0, [x0, #0]
// cmp w0, w1
// >
// ldrsh w0, [x0, #0]
// cmn w0, #1
// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
// if and only if (sext LHS) == (sext RHS). The checks are in place to
// ensure both the LHS and RHS are truly zero extended and to make sure the
// transformation is profitable.
if ((RHSC>getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
cast<LoadSDNode>(LHS)>getExtensionType() == ISD::ZEXTLOAD &&
cast<LoadSDNode>(LHS)>getMemoryVT() == MVT::i16 &&
LHS.getNode()>hasNUsesOfValue(1, 0)) {
int16_t ValueofRHS = cast<ConstantSDNode>(RHS)>getZExtValue();
if (ValueofRHS < 0 && isLegalArithImmed(ValueofRHS)) {
SDValue SExt =
DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
DAG.getValueType(MVT::i16));
Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
RHS.getValueType()),
CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
}
if (!Cmp && (RHSC>isNullValue()  RHSC>isOne())) {
if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
if ((CC == ISD::SETNE) ^ RHSC>isNullValue())
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
}
}
if (!Cmp) {
Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
AArch64CC = changeIntCCToAArch64CC(CC);
}
AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
return Cmp;
}
static std::pair<SDValue, SDValue>
getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
assert((Op.getValueType() == MVT::i32  Op.getValueType() == MVT::i64) &&
"Unsupported value type");
SDValue Value, Overflow;
SDLoc DL(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
unsigned Opc = 0;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unknown overflow instruction!");
case ISD::SADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::VS;
break;
case ISD::UADDO:
Opc = AArch64ISD::ADDS;
CC = AArch64CC::HS;
break;
case ISD::SSUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::VS;
break;
case ISD::USUBO:
Opc = AArch64ISD::SUBS;
CC = AArch64CC::LO;
break;
// Multiply needs a little bit extra work.
case ISD::SMULO:
case ISD::UMULO: {
CC = AArch64CC::NE;
bool IsSigned = Op.getOpcode() == ISD::SMULO;
if (Op.getValueType() == MVT::i32) {
unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
// For a 32 bit multiply with overflow check we want the instruction
// selector to generate a widening multiply (SMADDL/UMADDL). For that we
// need to generate the following pattern:
// (i64 add 0, (i64 mul (i64 sextzext i32 %a), (i64 sextzext i32 %b))
LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
DAG.getConstant(0, DL, MVT::i64));
// On AArch64 the upper 32 bits are always zero extended for a 32 bit
// operation. We need to clear out the upper 32 bits, because we used a
// widening multiply that wrote all 64 bits. In the end this should be a
// noop.
Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
if (IsSigned) {
// The signed overflow check requires more than just a simple check for
// any bit set in the upper 32 bits of the result. These bits could be
// just the sign bits of a negative number. To perform the overflow
// check we have to arithmetic shift right the 32nd bit of the result by
// 31 bits. Then we compare the result to the upper 32 bits.
SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
DAG.getConstant(32, DL, MVT::i64));
UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
DAG.getConstant(31, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
// The overflow check for unsigned multiply is easy. We only need to
// check if any of the upper 32 bits are set. This can be done with a
// CMP (shifted register). For that we need to generate the following
// pattern:
// (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
DAG.getConstant(32, DL, MVT::i64));
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
}
assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
// For the 64 bit multiply
Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
if (IsSigned) {
SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
DAG.getConstant(63, DL, MVT::i64));
// It is important that LowerBits is last, otherwise the arithmetic
// shift will not be folded into the compare (SUBS).
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
.getValue(1);
} else {
SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
Overflow =
DAG.getNode(AArch64ISD::SUBS, DL, VTs,
DAG.getConstant(0, DL, MVT::i64),
UpperBits).getValue(1);
}
break;
}
} // switch (...)
if (Opc) {
SDVTList VTs = DAG.getVTList(Op>getValueType(0), MVT::i32);
// Emit the AArch64 operation with overflow check.
Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
Overflow = Value.getValue(1);
}
return std::make_pair(Value, Overflow);
}
SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
RTLIB::Libcall Call) const {
SmallVector<SDValue, 2> Ops(Op>op_begin(), Op>op_end());
MakeLibCallOptions CallOptions;
return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
}
// Returns true if the given Op is the overflow flag result of an overflow
// intrinsic operation.
static bool isOverflowIntrOpRes(SDValue Op) {
unsigned Opc = Op.getOpcode();
return (Op.getResNo() == 1 &&
(Opc == ISD::SADDO  Opc == ISD::UADDO  Opc == ISD::SSUBO 
Opc == ISD::USUBO  Opc == ISD::SMULO  Opc == ISD::UMULO));
}
static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
SDValue Sel = Op.getOperand(0);
SDValue Other = Op.getOperand(1);
SDLoc dl(Sel);
// If the operand is an overflow checking operation, invert the condition
// code and kill the Not operation. I.e., transform:
// (xor (overflow_op_bool, 1))
// >
// (csel 1, 0, invert(cc), overflow_op_bool)
// ... which later gets transformed to just a cset instruction with an
// inverted condition code, rather than a cset + eor sequence.
if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
// Only lower legal XALUO ops.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel>getValueType(0)))
return SDValue();
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
AArch64CC::CondCode CC;
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
CCVal, Overflow);
}
// If neither operand is a SELECT_CC, give up.
if (Sel.getOpcode() != ISD::SELECT_CC)
std::swap(Sel, Other);
if (Sel.getOpcode() != ISD::SELECT_CC)
return Op;
// The folding we want to perform is:
// (xor x, (select_cc a, b, cc, 0, 1) )
// >
// (csel x, (xor x, 1), cc ...)
//
// The latter will get matched to a CSINV instruction.
ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))>get();
SDValue LHS = Sel.getOperand(0);
SDValue RHS = Sel.getOperand(1);
SDValue TVal = Sel.getOperand(2);
SDValue FVal = Sel.getOperand(3);
// FIXME: This could be generalized to noninteger comparisons.
if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
return Op;
ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
// The values aren't constants, this isn't the pattern we're looking for.
if (!CFVal  !CTVal)
return Op;
// We can commute the SELECT_CC by inverting the condition. This
// might be needed to make this fit into a CSINV pattern.
if (CTVal>isAllOnesValue() && CFVal>isNullValue()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, true);
}
// If the constants line up, perform the transform!
if (CTVal>isNullValue() && CFVal>isAllOnesValue()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
FVal = Other;
TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
DAG.getConstant(1ULL, dl, Other.getValueType()));
return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
CCVal, Cmp);
}
return Op;
}
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
unsigned Opc;
bool ExtraOp = false;
switch (Op.getOpcode()) {
default:
llvm_unreachable("Invalid code");
case ISD::ADDC:
Opc = AArch64ISD::ADDS;
break;
case ISD::SUBC:
Opc = AArch64ISD::SUBS;
break;
case ISD::ADDE:
Opc = AArch64ISD::ADCS;
ExtraOp = true;
break;
case ISD::SUBE:
Opc = AArch64ISD::SBCS;
ExtraOp = true;
break;
}
if (!ExtraOp)
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
Op.getOperand(2));
}
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
// Let legalize expand this if it isn't a legal type yet.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
return SDValue();
SDLoc dl(Op);
AArch64CC::CondCode CC;
// The actual operation that sets the overflow or carry flag.
SDValue Value, Overflow;
std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
// We use an inverted condition, because the conditional select is inverted
// too. This will allow it to be selected to a single instruction:
// CSINC Wd, WZR, WZR, invert(cond).
SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
CCVal, Overflow);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
}
// Prefetch operands are:
// 1: Address to prefetch
// 2: bool isWrite
// 3: int locality (0 = no locality ... 3 = extreme locality)
// 4: bool isDataCache
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))>getZExtValue();
unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))>getZExtValue();
unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))>getZExtValue();
bool IsStream = !Locality;
// When the locality number is set
if (Locality) {
// The frontend should have filtered out the outofrange values
assert(Locality <= 3 && "Prefetch locality outofrange");
// The locality degree is the opposite of the cache speed.
// Put the number the other way around.
// The encoding starts at 0 for level 1
Locality = 3  Locality;
}
// built the mask value encoding the expected behavior.
unsigned PrfOp = (IsWrite << 4)  // Load/Store bit
(!IsData << 3)  // IsDataCache bit
(Locality << 1)  // Cache level bits
(unsigned)IsStream; // Stream bit
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
}
SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
RTLIB::Libcall LC;
LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
return LowerF128Call(Op, DAG, LC);
}
SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
// FP_ROUND node has a second operand indicating whether it is known to be
// precise. That doesn't take part in the LibCall so we can't directly use
// LowerF128Call.
SDValue SrcVal = Op.getOperand(0);
MakeLibCallOptions CallOptions;
return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions,
SDLoc(Op)).first;
}
SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
EVT InVT = Op.getOperand(0).getValueType();
EVT VT = Op.getValueType();
unsigned NumElts = InVT.getVectorNumElements();
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (InVT.getVectorElementType() == MVT::f16 &&
!Subtarget>hasFullFP16()) {
MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
}
if (VT.getSizeInBits() < InVT.getSizeInBits()) {
SDLoc dl(Op);
SDValue Cv =
DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
Op.getOperand(0));
return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
}
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
SDLoc dl(Op);
MVT ExtVT =
MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
VT.getVectorNumElements());
SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
// Type changing conversions are illegal.
return Op;
}
SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getOperand(0).getValueType().isVector())
return LowerVectorFP_TO_INT(Op, DAG);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (Op.getOperand(0).getValueType() == MVT::f16 &&
!Subtarget>hasFullFP16()) {
SDLoc dl(Op);
return DAG.getNode(
Op.getOpcode(), dl, Op.getValueType(),
DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
}
if (Op.getOperand(0).getValueType() != MVT::f128) {
// It's legal except when f128 is involved
return Op;
}
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::FP_TO_SINT)
LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
else
LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
SmallVector<SDValue, 2> Ops(Op>op_begin(), Op>op_end());
MakeLibCallOptions CallOptions;
return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first;
}
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
// Any additional optimization in this function should be recorded
// in the cost tables.
EVT VT = Op.getValueType();
SDLoc dl(Op);
SDValue In = Op.getOperand(0);
EVT InVT = In.getValueType();
if (VT.getSizeInBits() < InVT.getSizeInBits()) {
MVT CastVT =
MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
InVT.getVectorNumElements());
In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
}
if (VT.getSizeInBits() > InVT.getSizeInBits()) {
unsigned CastOpc =
Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
EVT CastVT = VT.changeVectorElementTypeToInteger();
In = DAG.getNode(CastOpc, dl, CastVT, In);
return DAG.getNode(Op.getOpcode(), dl, VT, In);
}
return Op;
}
SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
if (Op.getValueType().isVector())
return LowerVectorINT_TO_FP(Op, DAG);
// f16 conversions are promoted to f32 when full fp16 is not supported.
if (Op.getValueType() == MVT::f16 &&
!Subtarget>hasFullFP16()) {
SDLoc dl(Op);
return DAG.getNode(
ISD::FP_ROUND, dl, MVT::f16,
DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
DAG.getIntPtrConstant(0, dl));
}
// i128 conversions are libcalls.
if (Op.getOperand(0).getValueType() == MVT::i128)
return SDValue();
// Other conversions are legal, unless it's to the completely softwarebased
// fp128.
if (Op.getValueType() != MVT::f128)
return Op;
RTLIB::Libcall LC;
if (Op.getOpcode() == ISD::SINT_TO_FP)
LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
else
LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
return LowerF128Call(Op, DAG, LC);
}
SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
SelectionDAG &DAG) const {
// For iOS, we want to call an alternative entry point: __sincos_stret,
// which returns the values in two S / D registers.
SDLoc dl(Op);
SDValue Arg = Op.getOperand(0);
EVT ArgVT = Arg.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
ArgListTy Args;
ArgListEntry Entry;
Entry.Node = Arg;
Entry.Ty = ArgTy;
Entry.IsSExt = false;
Entry.IsZExt = false;
Args.push_back(Entry);
RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
: RTLIB::SINCOS_STRET_F32;
const char *LibcallName = getLibcallName(LC);
SDValue Callee =
DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
StructType *RetTy = StructType::get(ArgTy, ArgTy);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
if (Op.getValueType() != MVT::f16)
return SDValue();
assert(Op.getOperand(0).getValueType() == MVT::i16);
SDLoc DL(Op);
Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
return SDValue(
DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
0);
}
static EVT getExtensionTo64Bits(const EVT &OrigVT) {
if (OrigVT.getSizeInBits() >= 64)
return OrigVT;
assert(OrigVT.isSimple() && "Expecting a simple value type");
MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
switch (OrigSimpleTy) {
default: llvm_unreachable("Unexpected Vector Type");
case MVT::v2i8:
case MVT::v2i16:
return MVT::v2i32;
case MVT::v4i8:
return MVT::v4i16;
}
}
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
const EVT &OrigTy,
const EVT &ExtTy,
unsigned ExtOpcode) {
// The vector originally had a size of OrigTy. It was then extended to ExtTy.
// We expect the ExtTy to be 128bits total. If the OrigTy is less than
// 64bits we need to insert a new extension so that it will be 64bits.
assert(ExtTy.is128BitVector() && "Unexpected extension size");
if (OrigTy.getSizeInBits() >= 64)
return N;
// Must extend size to at least 64 bits to be used as an operand for VMULL.
EVT NewVT = getExtensionTo64Bits(OrigTy);
return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
}
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
bool isSigned) {
EVT VT = N>getValueType(0);
if (N>getOpcode() != ISD::BUILD_VECTOR)
return false;
for (const SDValue &Elt : N>op_values()) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
unsigned EltSize = VT.getScalarSizeInBits();
unsigned HalfSize = EltSize / 2;
if (isSigned) {
if (!isIntN(HalfSize, C>getSExtValue()))
return false;
} else {
if (!isUIntN(HalfSize, C>getZExtValue()))
return false;
}
continue;
}
return false;
}
return true;
}
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
if (N>getOpcode() == ISD::SIGN_EXTEND  N>getOpcode() == ISD::ZERO_EXTEND)
return addRequiredExtensionForVectorMULL(N>getOperand(0), DAG,
N>getOperand(0)>getValueType(0),
N>getValueType(0),
N>getOpcode());
assert(N>getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
EVT VT = N>getValueType(0);
SDLoc dl(N);
unsigned EltSize = VT.getScalarSizeInBits() / 2;
unsigned NumElts = VT.getVectorNumElements();
MVT TruncVT = MVT::getIntegerVT(EltSize);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
ConstantSDNode *C = cast<ConstantSDNode>(N>getOperand(i));
const APInt &CInt = C>getAPIntValue();
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
}
return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
}
static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
return N>getOpcode() == ISD::SIGN_EXTEND 
isExtendedBUILD_VECTOR(N, DAG, true);
}
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
return N>getOpcode() == ISD::ZERO_EXTEND 
isExtendedBUILD_VECTOR(N, DAG, false);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N>getOpcode();
if (Opcode == ISD::ADD  Opcode == ISD::SUB) {
SDNode *N0 = N>getOperand(0).getNode();
SDNode *N1 = N>getOperand(1).getNode();
return N0>hasOneUse() && N1>hasOneUse() &&
isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
}
return false;
}
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
unsigned Opcode = N>getOpcode();
if (Opcode == ISD::ADD  Opcode == ISD::SUB) {
SDNode *N0 = N>getOperand(0).getNode();
SDNode *N1 = N>getOperand(1).getNode();
return N0>hasOneUse() && N1>hasOneUse() &&
isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
}
return false;
}
SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
// The rounding mode is in bits 23:22 of the FPSCR.
// The ARM rounding mode value to FLT_ROUNDS mapping is 0>1, 1>2, 2>3, 3>0
// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
// so that the shift + and get folded into a bitfield extract.
SDLoc dl(Op);
SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
MVT::i64));
SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
DAG.getConstant(1U << 22, dl, MVT::i32));
SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
DAG.getConstant(22, dl, MVT::i32));
return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
DAG.getConstant(3, dl, MVT::i32));
}
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
// Multiplications are only customlowered for 128bit vectors so that
// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
EVT VT = Op.getValueType();
assert(VT.is128BitVector() && VT.isInteger() &&
"unexpected type for customlowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
unsigned NewOpc = 0;
bool isMLA = false;
bool isN0SExt = isSignExtended(N0, DAG);
bool isN1SExt = isSignExtended(N1, DAG);
if (isN0SExt && isN1SExt)
NewOpc = AArch64ISD::SMULL;
else {
bool isN0ZExt = isZeroExtended(N0, DAG);
bool isN1ZExt = isZeroExtended(N1, DAG);
if (isN0ZExt && isN1ZExt)
NewOpc = AArch64ISD::UMULL;
else if (isN1SExt  isN1ZExt) {
// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
if (isN1SExt && isAddSubSExt(N0, DAG)) {
NewOpc = AArch64ISD::SMULL;
isMLA = true;
} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
NewOpc = AArch64ISD::UMULL;
isMLA = true;
} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
std::swap(N0, N1);
NewOpc = AArch64ISD::UMULL;
isMLA = true;
}
}
if (!NewOpc) {
if (VT == MVT::v2i64)
// Fall through to expand this. It is not legal.
return SDValue();
else
// Other vector multiplications are legal.
return Op;
}
}
// Legalize to a S/UMULL instruction
SDLoc DL(Op);
SDValue Op0;
SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
if (!isMLA) {
Op0 = skipExtensionForVectorMULL(N0, DAG);
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
}
// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
// isel lowering to take advantage of nostall back to back s/umul + s/umla.
// This is true for CPUs with accumulate forwarding such as CortexA53/A57
SDValue N00 = skipExtensionForVectorMULL(N0>getOperand(0).getNode(), DAG);
SDValue N01 = skipExtensionForVectorMULL(N0>getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
return DAG.getNode(N0>getOpcode(), DL, VT,
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
DAG.getNode(NewOpc, DL, VT,
DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
}
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))>getZExtValue();
SDLoc dl(Op);
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::thread_pointer: {
EVT PtrVT = getPointerTy(DAG.getDataLayout());
return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
}
case Intrinsic::aarch64_neon_abs: {
EVT Ty = Op.getValueType();
if (Ty == MVT::i64) {
SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
Op.getOperand(1));
Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
} else {
report_fatal_error("Unexpected type for AArch64 NEON intrinic");
}
}
case Intrinsic::aarch64_neon_smax:
return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umax:
return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_smin:
return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_neon_umin:
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget>getRegisterInfo();
unsigned Reg = RegInfo>getLocalAddressRegister(MF);
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
Op.getSimpleValueType());
}
case Intrinsic::eh_recoverfp: {
// FIXME: This needs to be implemented to correctly handle highly aligned
// stack objects. For now we simply return the incoming FP. Refer D53541
// for more details.
SDValue FnOp = Op.getOperand(1);
SDValue IncomingFPOp = Op.getOperand(2);
GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD>getGlobal() : nullptr);
if (!Fn)
report_fatal_error(
"llvm.eh.recoverfp must take a function as the first argument");
return IncomingFPOp;
}
}
}
// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
EVT VT, EVT MemVT,
SelectionDAG &DAG) {
assert(VT.isVector() && "VT should be a vector type");
assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
SDValue Value = ST>getValue();
// It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
// the word lane which represent the v4i8 subvector. It optimizes the store
// to:
//
// xtn v0.8b, v0.8h
// str s0, [x0]
SDValue Undef = DAG.getUNDEF(MVT::i16);
SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
{Undef, Undef, Undef, Undef});
SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
Value, UndefVec);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
Trunc, DAG.getConstant(0, DL, MVT::i64));
return DAG.getStore(ST>getChain(), DL, ExtractTrunc,
ST>getBasePtr(), ST>getMemOperand());
}
// Custom lowering for any store, vector or scalar and/or default or with
// a truncate operations. Currently only custom lower truncate operation
// from vector v4i16 to v4i8.
SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc Dl(Op);
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
assert (StoreNode && "Can only custom lower store nodes");
SDValue Value = StoreNode>getValue();
EVT VT = Value.getValueType();
EVT MemVT = StoreNode>getMemoryVT();
assert (VT.isVector() && "Can only custom lower vector store types");
unsigned AS = StoreNode>getAddressSpace();
unsigned Align = StoreNode>getAlignment();
if (Align < MemVT.getStoreSize() &&
!allowsMisalignedMemoryAccesses(
MemVT, AS, Align, StoreNode>getMemOperand()>getFlags(), nullptr)) {
return scalarizeVectorStore(StoreNode, DAG);
}
if (StoreNode>isTruncatingStore()) {
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Custom lowering: ");
LLVM_DEBUG(Op.dump());
switch (Op.getOpcode()) {
default:
llvm_unreachable("unimplemented operand");
return SDValue();
case ISD::BITCAST:
return LowerBITCAST(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
case ISD::GlobalTLSAddress:
return LowerGlobalTLSAddress(Op, DAG);
case ISD::SETCC:
return LowerSETCC(Op, DAG);
case ISD::BR_CC:
return LowerBR_CC(Op, DAG);
case ISD::SELECT:
return LowerSELECT(Op, DAG);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
case ISD::BR_JT:
return LowerBR_JT(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
case ISD::BlockAddress:
return LowerBlockAddress(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
case ISD::VACOPY:
return LowerVACOPY(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE:
return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SMULO:
case ISD::UMULO:
return LowerXALUO(Op, DAG);
case ISD::FADD:
return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
case ISD::FSUB:
return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
case ISD::FMUL:
return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
case ISD::FDIV:
return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FP_ROUND:
return LowerFP_ROUND(Op, DAG);
case ISD::FP_EXTEND:
return LowerFP_EXTEND(Op, DAG);
case ISD::FRAMEADDR:
return LowerFRAMEADDR(Op, DAG);
case ISD::SPONENTRY:
return LowerSPONENTRY(Op, DAG);
case ISD::RETURNADDR:
return LowerRETURNADDR(Op, DAG);
case ISD::ADDROFRETURNADDR:
return LowerADDROFRETURNADDR(Op, DAG);
case ISD::INSERT_VECTOR_ELT:
return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
return LowerVectorSRA_SRL_SHL(Op, DAG);
case ISD::SHL_PARTS:
return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
case ISD::SRA_PARTS:
return LowerShiftRightParts(Op, DAG);
case ISD::CTPOP:
return LowerCTPOP(Op, DAG);
case ISD::FCOPYSIGN:
return LowerFCOPYSIGN(Op, DAG);
case ISD::OR:
return LowerVectorOR(Op, DAG);
case ISD::XOR:
return LowerXOR(Op, DAG);
case ISD::PREFETCH:
return LowerPREFETCH(Op, DAG);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return LowerFP_TO_INT(Op, DAG);
case ISD::FSINCOS:
return LowerFSINCOS(Op, DAG);
case ISD::FLT_ROUNDS_:
return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL:
return LowerMUL(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::STORE:
return LowerSTORE(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
return LowerVECREDUCE(Op, DAG);
case ISD::ATOMIC_LOAD_SUB:
return LowerATOMIC_LOAD_SUB(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
}
//======//
// Calling Convention Implementation
//======//
/// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
default:
report_fatal_error("Unsupported calling convention.");
case CallingConv::WebKit_JS:
return CC_AArch64_WebKit_JS;
case CallingConv::GHC:
return CC_AArch64_GHC;
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
if (Subtarget>isTargetWindows() && IsVarArg)
return CC_AArch64_Win64_VarArg;
if (!Subtarget>isTargetDarwin())
return CC_AArch64_AAPCS;
if (!IsVarArg)
return CC_AArch64_DarwinPCS;
return Subtarget>isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
: CC_AArch64_DarwinPCS_VarArg;
case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
case CallingConv::AArch64_VectorCall:
return CC_AArch64_AAPCS;
}
}
CCAssignFn *
AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
: RetCC_AArch64_AAPCS;
}
SDValue AArch64TargetLowering::LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
bool IsWin64 = Subtarget>isCallingConvWin64(MF.getFunction().getCallingConv());
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
// Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
// we use a special version of AnalyzeFormalArguments to pass in ValVT and
// LocVT.
unsigned NumArgs = Ins.size();
Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
unsigned CurArgIdx = 0;
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ValVT = Ins[i].VT;
if (Ins[i].isOrigArg()) {
std::advance(CurOrigArg, Ins[i].getOrigArgIndex()  CurArgIdx);
CurArgIdx = Ins[i].getOrigArgIndex();
// Get type of the original argument.
EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg>getType(),
/*AllowUnknown*/ true);
MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
if (ActualMVT == MVT::i1  ActualMVT == MVT::i8)
ValVT = MVT::i8;
else if (ActualMVT == MVT::i16)
ValVT = MVT::i16;
}
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
bool Res =
AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
assert(!Res && "Call operand has unhandled type");
(void)Res;
}
assert(ArgLocs.size() == Ins.size());
SmallVector<SDValue, 16> ArgValues;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (Ins[i].Flags.isByVal()) {
// Byval is used for HFAs in the PCS, but the system should work in a
// noncompliant manner for larger structs.
EVT PtrVT = getPointerTy(DAG.getDataLayout());
int Size = Ins[i].Flags.getByValSize();
unsigned NumRegs = (Size + 7) / 8;
// FIXME: This works on bigendian for composite byvals, which are the common
// case. It should also work for fundamental types too.
unsigned FrameIdx =
MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
InVals.push_back(FrameIdxN);
continue;
}
SDValue ArgValue;
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
RC = &AArch64::GPR32RegClass;
else if (RegVT == MVT::i64)
RC = &AArch64::GPR64RegClass;
else if (RegVT == MVT::f16)
RC = &AArch64::FPR16RegClass;
else if (RegVT == MVT::f32)
RC = &AArch64::FPR32RegClass;
else if (RegVT == MVT::f64  RegVT.is64BitVector())
RC = &AArch64::FPR64RegClass;
else if (RegVT == MVT::f128  RegVT.is128BitVector())
RC = &AArch64::FPR128RegClass;
else if (RegVT.isScalableVector() &&
RegVT.getVectorElementType() == MVT::i1)
RC = &AArch64::PPRRegClass;
else if (RegVT.isScalableVector())
RC = &AArch64::ZPRRegClass;
else
llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
// Transform the arguments in physical registers into virtual ones.
unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
// If this is an 8, 16 or 32bit value, it is really passed promoted
// to 64 bits. Insert an assert[sz]ext to capture this, then
// truncate to the right size.
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unknown loc info!");
case CCValAssign::Full:
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
llvm_unreachable("Spilling of SVE vectors not yet implemented");
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
break;
case CCValAssign::AExt:
case CCValAssign::SExt:
case CCValAssign::ZExt:
break;
case CCValAssign::AExtUpper:
ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
DAG.getConstant(32, DL, RegVT));
ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
break;
}
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
uint32_t BEAlign = 0;
if (!Subtarget>isLittleEndian() && ArgSize < 8 &&
!Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8  ArgSize;
int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
MVT MemVT = VA.getValVT();
switch (VA.getLocInfo()) {
default:
break;
case CCValAssign::Trunc:
case CCValAssign::BCvt:
MemVT = VA.getLocVT();
break;
case CCValAssign::Indirect:
assert(VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly");
llvm_unreachable("Spilling of SVE vectors not yet implemented");
case CCValAssign::SExt:
ExtType = ISD::SEXTLOAD;
break;
case CCValAssign::ZExt:
ExtType = ISD::ZEXTLOAD;
break;
case CCValAssign::AExt:
ExtType = ISD::EXTLOAD;
break;
}
ArgValue = DAG.getExtLoad(
ExtType, DL, VA.getLocVT(), Chain, FIN,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
MemVT);