Diff 145169

lib/Target/AArch64/AArch64InstrFormats.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 5,973 Lines • ▼ Show 20 Lines
multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,		multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,
SDPatternOperator OpNode = null_frag> {		SDPatternOperator OpNode = null_frag> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {		let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,		def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;		[(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,		def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;		[(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
let Predicates = [HasNEON, HasFullFP16] in {		let Predicates = [HasNEON, HasFullFP16] in {
def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,		def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm, []>;
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Remove comments SjoerdMeijer: Remove comments
		SjoerdMeijerUnsubmitted Done Reply Inline Actions No changes were made here, so can you keep the old formatting? SjoerdMeijer: No changes were made here, so can you keep the old formatting?
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Nit: trailing whitespace SjoerdMeijer: Nit: trailing whitespace
[]>;
} // Predicates = [HasNEON, HasFullFP16]		} // Predicates = [HasNEON, HasFullFP16]
}		}

def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),		def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
(!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;		(!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
}		}

class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,		class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
▲ Show 20 Lines • Show All 1,793 Lines • ▼ Show 20 Lines	class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
let Inst{22-16} = fixed_imm;		let Inst{22-16} = fixed_imm;
let Inst{15-11} = opc;		let Inst{15-11} = opc;
let Inst{10} = 1;		let Inst{10} = 1;
let Inst{9-5} = Rn;		let Inst{9-5} = Rn;
let Inst{4-0} = Rd;		let Inst{4-0} = Rd;
}		}


multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {		multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {

		SjoerdMeijerUnsubmitted Done Reply Inline Actions nit: trailing whitespace? SjoerdMeijer: nit: trailing whitespace?
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Do we use OpNode? SjoerdMeijer: Do we use OpNode?
		LukeGeesonAuthorUnsubmitted Done Reply Inline Actions builds and tests fine without it - removed LukeGeeson: builds and tests fine without it - removed
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Nit: unnecessary new line. SjoerdMeijer: Nit: unnecessary new line.
let Predicates = [HasNEON, HasFullFP16] in {		let Predicates = [HasNEON, HasFullFP16] in {
def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},		def SHr : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
FPR16, FPR16, vecshiftR16, asm, []> {		FPR32, FPR16, vecshiftR32, asm, []> {
let Inst{19-16} = imm{3-0};		let Inst{19-16} = imm{3-0};
		SjoerdMeijerUnsubmitted Done Reply Inline Actions nit: don't think you break up the lines like this. SjoerdMeijer: nit: don't think you break up the lines like this.
		LukeGeesonAuthorUnsubmitted Done Reply Inline Actions Would you remove line 7803 too? looks better for separation LukeGeeson: Would you remove line 7803 too? looks better for separation
		let Inst{23-22} = 0b11;
}		}

} // Predicates = [HasNEON, HasFullFP16]		} // Predicates = [HasNEON, HasFullFP16]

		def HDr : BaseSIMDScalarShift<U, opc, {?,?,?,?,?,?,?},
		FPR16, FPR64, vecshiftR16, asm, []> {
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Nit1: spaces are off: FPR16 should be aligned under U. Nit2: space between ">{". Same for other rules below. SjoerdMeijer: Nit1: spaces are off: FPR16 should be aligned under U. Nit2: space between ">{". Same for other…
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Does this need to be vecshiftR32 and thus accept values [1,32]? If that's the case, we also need to update the tests. SjoerdMeijer: Does this need to be vecshiftR32 and thus accept values [1,32]? If that's the case, we also…
		let Inst{21-16} = imm{5-0};
		let Inst{23-22} = 0b11;
		}

		def DHr : BaseSIMDScalarShift<U, opc, {?,?,?,?,?,?,?},
		FPR64, FPR16, vecshiftR64, asm, []> {
		let Inst{21-16} = imm{5-0};
		let Inst{23-22} = 0b11;
		let Inst{31} = 1;
		}

def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},		def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
FPR32, FPR32, vecshiftR32, asm, []> {		FPR32, FPR32, vecshiftR32, asm, []> {
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Do the HDr and DHr patterns need to be guarded by predicates [HasNEON, HasFullFP16]? Can you check if they are all predicates are correctly set here? Also looks like we can simply things here a bit: merge all patterns with the same neon and fullfp16 predicates in one block. SjoerdMeijer: Do the HDr and DHr patterns need to be guarded by predicates [HasNEON, HasFullFP16]? Can you…
let Inst{20-16} = imm{4-0};		let Inst{20-16} = imm{4-0};
}		}

def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},		def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
FPR64, FPR64, vecshiftR64, asm, []> {		FPR64, FPR64, vecshiftR16, asm, []> {
		SjoerdMeijerUnsubmitted Done Reply Inline Actions We don't need to change this? SjoerdMeijer: We don't need to change this?
let Inst{21-16} = imm{5-0};		let Inst{21-16} = imm{5-0};
}		}

		let Predicates = [HasNEON, HasFullFP16] in {
		def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
		SjoerdMeijerUnsubmitted Done Reply Inline Actions To keep the changes minimal, can you please move the "def h" back up where it was? SjoerdMeijer: To keep the changes minimal, can you please move the "def h" back up where it was?
		FPR16, FPR32, vecshiftR16, asm, []> {
		let Inst{19-16} = imm{3-0};
		}
		} // Predicates = [HasNEON, HasFullFP16]

}		}

multiclass SIMDScalarRShiftD<bit U, bits<5> opc, string asm,		multiclass SIMDScalarRShiftD<bit U, bits<5> opc, string asm,
SDPatternOperator OpNode> {		SDPatternOperator OpNode> {
def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},		def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
FPR64, FPR64, vecshiftR64, asm,		FPR64, FPR64, vecshiftR64, asm,
[(set (i64 FPR64:$Rd),		[(set (i64 FPR64:$Rd),
(OpNode (i64 FPR64:$Rn), (i32 vecshiftR64:$imm)))]> {		(OpNode (i64 FPR64:$Rn), (i32 vecshiftR64:$imm)))]> {
▲ Show 20 Lines • Show All 2,266 Lines • Show Last 20 Lines

lib/Target/AArch64/AArch64InstrInfo.td

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 4,863 Lines • ▼ Show 20 Lines	def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
(SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;		(SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;

//----------------------------------------------------------------------------		//----------------------------------------------------------------------------
// AdvSIMD scalar shift instructions		// AdvSIMD scalar shift instructions
//----------------------------------------------------------------------------		//----------------------------------------------------------------------------
defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;		defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;		defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;		defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;		defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Do we need to pass the intrinsics opnodes? SjoerdMeijer: Do we need to pass the intrinsics opnodes?
// Codegen patterns for the above. We don't put these directly on the		// Codegen patterns for the above. We don't put these directly on the
// instructions because TableGen's type inference can't handle the truth.		// instructions because TableGen's type inference can't handle the truth.
// Having the same base pattern for fp <--> int totally freaks it out.		// Having the same base pattern for fp <--> int totally freaks it out.
def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),		def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
(FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;		(FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),		def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
(FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;		(FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),		def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
		SjoerdMeijerUnsubmitted Done Reply Inline Actions Better not to introduce new line breaks here and also below. SjoerdMeijer: Better not to introduce new line breaks here and also below.
(FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;		(FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),		def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
(FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;		(FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),		def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
vecshiftR64:$imm)),		vecshiftR64:$imm)),
(FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;		(FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),		def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
vecshiftR64:$imm)),		vecshiftR64:$imm)),
(FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;		(FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),		def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
(SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;		(FCVTZSHDr (i64 FPR64:$Rn), vecshiftR16:$imm)>;
		def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu FPR16:$Rn, vecshiftR32:$imm)),
		(FCVTZUSHr FPR16:$Rn, vecshiftR32:$imm)>;
		def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs FPR16:$Rn, vecshiftR32:$imm)),
		(FCVTZSSHr FPR16:$Rn, vecshiftR32:$imm)>;
		def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
		(FCVTZSDHr (f16 FPR16:$Rn), vecshiftR64:$imm)>;
		def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
		(UCVTFh FPR32:$Rn, vecshiftR16:$imm)>;
def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),		def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
(UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;		(UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),		def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
(UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;		(UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),		def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
vecshiftR64:$imm)),		vecshiftR64:$imm)),
(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;		(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
		def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
		(SCVTFh FPR32:$Rn, vecshiftR16:$imm)>;
		def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR16:$imm)),
		(SCVTFh FPR32:$Rn, vecshiftR16:$imm)>;
		def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
		(SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),		def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
vecshiftR64:$imm)),		vecshiftR64:$imm)),
(UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;		(UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;

defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;		defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;		defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",		defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
int_aarch64_neon_sqrshrn>;		int_aarch64_neon_sqrshrn>;
▲ Show 20 Lines • Show All 1,403 Lines • Show Last 20 Lines

test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll

	; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 \| FileCheck %s			; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 \| FileCheck %s

	declare half @llvm.aarch64.sisd.fabd.f16(half, half)			declare half @llvm.aarch64.sisd.fabd.f16(half, half)
	declare half @llvm.aarch64.neon.fmax.f16(half, half)			declare half @llvm.aarch64.neon.fmax.f16(half, half)
	declare half @llvm.aarch64.neon.fmin.f16(half, half)			declare half @llvm.aarch64.neon.fmin.f16(half, half)
	declare half @llvm.aarch64.neon.frsqrts.f16(half, half)			declare half @llvm.aarch64.neon.frsqrts.f16(half, half)
	declare half @llvm.aarch64.neon.frecps.f16(half, half)			declare half @llvm.aarch64.neon.frecps.f16(half, half)
	declare half @llvm.aarch64.neon.fmulx.f16(half, half)			declare half @llvm.aarch64.neon.fmulx.f16(half, half)
	declare half @llvm.fabs.f16(half)			declare half @llvm.fabs.f16(half)

	define dso_local half @t_vabdh_f16(half %a, half %b) {			define dso_local half @t_vabdh_f16(half %a, half %b) {
	; CHECK-LABEL: t_vabdh_f16:			; CHECK-LABEL: t_vabdh_f16:
	; CHECK: fabd h0, h0, h1			; CHECK: fabd h0, h0, h1
				SjoerdMeijerUnsubmitted Done Reply Inline Actions Please don't modify the existing test cases. There is no need to check for the %bb.0 stuff as it doesn't add any value. SjoerdMeijer: Please don't modify the existing test cases. There is no need to check for the %bb.0 stuff as…
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	entry:			entry:
	%vabdh_f16 = tail call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b)			%vabdh_f16 = tail call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b)
	ret half %vabdh_f16			ret half %vabdh_f16
	}			}

	define dso_local half @t_vabdh_f16_from_fsub_fabs(half %a, half %b) {			define dso_local half @t_vabdh_f16_from_fsub_fabs(half %a, half %b) {
	; CHECK-LABEL: t_vabdh_f16_from_fsub_fabs:			; CHECK-LABEL: t_vabdh_f16_from_fsub_fabs:
	▲ Show 20 Lines • Show All 99 Lines • ▼ Show 20 Lines
	define dso_local half @t_vrsqrtsh_f16(half %a, half %b) {			define dso_local half @t_vrsqrtsh_f16(half %a, half %b) {
	; CHECK-LABEL: t_vrsqrtsh_f16:			; CHECK-LABEL: t_vrsqrtsh_f16:
	; CHECK: frsqrts h0, h0, h1			; CHECK: frsqrts h0, h0, h1
	; CHECK-NEXT: ret			; CHECK-NEXT: ret
	entry:			entry:
	%vrsqrtsh_f16 = tail call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b)			%vrsqrtsh_f16 = tail call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b)
	ret half %vrsqrtsh_f16			ret half %vrsqrtsh_f16
	}			}

				declare half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32, i32) #1
				declare half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64, i32) #1
				SjoerdMeijerUnsubmitted Done Reply Inline Actions You should add CHECK lines for all these test cases (see examples above). SjoerdMeijer: You should add CHECK lines for all these test cases (see examples above).
				declare i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half, i32) #1
				declare i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half, i32) #1
				SjoerdMeijerUnsubmitted Done Reply Inline Actions Please remove this line here and similar lines in the test cases below. SjoerdMeijer: Please remove this line here and similar lines in the test cases below.
				declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1
				SjoerdMeijerUnsubmitted Done Reply Inline Actions You should create a regexp for w8, and use it in the line below. Currently the test is very fragile, because if another register gets allocated this test starts failing. SjoerdMeijer: You should create a regexp for w8, and use it in the line below. Currently the test is very…
				declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1

				define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_s16_1:
				; CHECK: sxth w[[wReg:[0-9]+]], w0
				; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
				; CHECK-NEXT: scvtf h0, s0, #1
				; CHECK-NEXT: ret
				entry:
				%sext = sext i16 %a to i32
				%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1)
				ret half %fcvth_n
				}

				define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_s16_16:
				; CHECK: sxth w[[wReg:[0-9]+]], w0
				; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
				; CHECK-NEXT: scvtf h0, s0, #16
				; CHECK-NEXT: ret
				entry:
				%sext = sext i16 %a to i32
				SjoerdMeijerUnsubmitted Done Reply Inline Actions What is the supported range of constant 'n' for this intrinsic? If it is e.g. [1,16], I think it is best to test the minimum value 1, which is what we do here, but also the maximum value 16. Same comment for the other intrinsics here. SjoerdMeijer: What is the supported range of constant 'n' for this intrinsic? If it is e.g. [1,16], I think…
				%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 16)
				ret half %fcvth_n
				}

				define dso_local half @test_vcvth_n_f16_s32_1(i32 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_s32_1:
				; CHECK: fmov s0, w0
				; CHECK-NEXT: scvtf h0, s0, #1
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1)
				ret half %vcvth_n_f16_s32
				}

				define dso_local half @test_vcvth_n_f16_s32_16(i32 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_s32_16:
				; CHECK: fmov s0, w0
				; CHECK-NEXT: scvtf h0, s0, #16
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 16)
				ret half %vcvth_n_f16_s32
				}

				define dso_local half @test_vcvth_n_f16_s64_1(i64 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_s64_1:
				; CHECK: fmov d0, x0
				; CHECK-NEXT: fcvtzs h0, d0, #1
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1)
				ret half %vcvth_n_f16_s64
				}

				define dso_local half @test_vcvth_n_f16_s64_16(i64 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_s64_16:
				; CHECK: fmov d0, x0
				; CHECK-NEXT: fcvtzs h0, d0, #16
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 16)
				ret half %vcvth_n_f16_s64
				}

				lebedev.riUnsubmitted Done Reply Inline Actions Is it intentional that there are no `; CHECK` lines? lebedev.ri: Is it intentional that there are no `; CHECK` lines?
				LukeGeesonAuthorUnsubmitted Done Reply Inline Actions missed this, will add thanks LukeGeeson: missed this, will add thanks
				define dso_local i16 @test_vcvth_n_s16_f16_1(half %a) {
				; CHECK-LABEL: test_vcvth_n_s16_f16_1:
				; CHECK: fcvtzs s0, h0, #1
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				entry:
				%fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1)
				%0 = trunc i32 %fcvth_n to i16
				ret i16 %0
				}

				define dso_local i16 @test_vcvth_n_s16_f16_16(half %a) {
				; CHECK-LABEL: test_vcvth_n_s16_f16_16:
				; CHECK: fcvtzs s0, h0, #16
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				entry:
				%fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16)
				%0 = trunc i32 %fcvth_n to i16
				ret i16 %0
				SjoerdMeijerUnsubmitted Done Reply Inline Actions Please remove this line. SjoerdMeijer: Please remove this line.
				}

				define dso_local i32 @test_vcvth_n_s32_f16_1(half %a) {
				; CHECK-LABEL: test_vcvth_n_s32_f16_1:
				; CHECK: fcvtzs s0, h0, #1
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1)
				ret i32 %vcvth_n_s32_f16
				}

				define dso_local i32 @test_vcvth_n_s32_f16_16(half %a) {
				; CHECK-LABEL: test_vcvth_n_s32_f16_16:
				; CHECK: fcvtzs s0, h0, #16
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16)
				ret i32 %vcvth_n_s32_f16
				}

				define dso_local i64 @test_vcvth_n_s64_f16_1(half %a) {
				; CHECK-LABEL: test_vcvth_n_s64_f16_1:
				; CHECK: fcvtzs d0, h0, #1
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 1)
				ret i64 %vcvth_n_s64_f16
				}

				define dso_local i64 @test_vcvth_n_s64_f16_16(half %a) {
				; CHECK-LABEL: test_vcvth_n_s64_f16_16:
				; CHECK: fcvtzs d0, h0, #16
				; CHECK-NEXT: fmov x0, d0
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 16)
				ret i64 %vcvth_n_s64_f16
				}

				define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_u16_1:
				; CHECK: and w[[wReg:[0-9]+]], w0, #0xffff
				; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
				; CHECK-NEXT: ucvtf h0, s0, #1
				; CHECK-NEXT: ret
				entry:
				%0 = zext i16 %a to i32
				%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1)
				ret half %fcvth_n
				}

				define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_u16_16:
				; CHECK: and w[[wReg:[0-9]+]], w0, #0xffff
				; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]]
				; CHECK-NEXT: ucvtf h0, s0, #16
				; CHECK-NEXT: ret
				entry:
				%0 = zext i16 %a to i32
				%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 16)
				ret half %fcvth_n
				}

				define dso_local half @test_vcvth_n_f16_u32_1(i32 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_u32_1:
				; CHECK: fmov s0, w0
				; CHECK-NEXT: ucvtf h0, s0, #1
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1)
				ret half %vcvth_n_f16_u32
				}

				define dso_local half @test_vcvth_n_f16_u32_16(i32 %a) {
				; CHECK-LABEL: test_vcvth_n_f16_u32_16:
				; CHECK: fmov s0, w0
				; CHECK-NEXT: ucvtf h0, s0, #16
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 16)
				ret half %vcvth_n_f16_u32
				}

				define dso_local i16 @test_vcvth_n_u16_f16_1(half %a) {
				; CHECK-LABEL: test_vcvth_n_u16_f16_1:
				; CHECK: fcvtzu s0, h0, #1
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				entry:
				%fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1)
				%0 = trunc i32 %fcvth_n to i16
				ret i16 %0
				}

				define dso_local i16 @test_vcvth_n_u16_f16_16(half %a) {
				; CHECK-LABEL: test_vcvth_n_u16_f16_16:
				; CHECK: fcvtzu s0, h0, #16
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				entry:
				%fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16)
				%0 = trunc i32 %fcvth_n to i16
				ret i16 %0
				}

				define dso_local i32 @test_vcvth_n_u32_f16_1(half %a) {
				; CHECK-LABEL: test_vcvth_n_u32_f16_1:
				; CHECK: fcvtzu s0, h0, #1
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1)
				ret i32 %vcvth_n_u32_f16
				}

				define dso_local i32 @test_vcvth_n_u32_f16_16(half %a) {
				; CHECK-LABEL: test_vcvth_n_u32_f16_16:
				; CHECK: fcvtzu s0, h0, #16
				; CHECK-NEXT: fmov w0, s0
				; CHECK-NEXT: ret
				entry:
				%vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16)
				ret i32 %vcvth_n_u32_f16
				}

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] added FP16 vcvth intrinsic support
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 145169

lib/Target/AArch64/AArch64InstrFormats.td

lib/Target/AArch64/AArch64InstrInfo.td

test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] added FP16 vcvth intrinsic supportClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 145169

lib/Target/AArch64/AArch64InstrFormats.td

lib/Target/AArch64/AArch64InstrInfo.td

test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll

[AArch64] added FP16 vcvth intrinsic support
ClosedPublic