Diff 65538

include/llvm/IR/IntrinsicsAMDGPU.td

Context not available.
	GCCBuiltin<"__builtin_amdgcn_lerp">,	GCCBuiltin<"__builtin_amdgcn_lerp">,
	Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;	Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;

		def int_amdgcn_icmp :
		arsenmUnsubmitted Done Reply Inline Actions You should remove this comment arsenm: You should remove this comment
		Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
		[IntrNoMem, IntrConvergent]>;
		arsenmUnsubmitted Done Reply Inline Actions Remove the GCCBuiltins, they don't work with overloaded intrinsics arsenm: Remove the GCCBuiltins, they don't work with overloaded intrinsics

		def int_amdgcn_fcmp :
		Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
		arsenmUnsubmitted Done Reply Inline Actions And this one arsenm: And this one
		[IntrNoMem, IntrConvergent]>;

	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
		arsenmUnsubmitted Done Reply Inline Actions the 3rd parameter should be i32 arsenm: the 3rd parameter should be i32
		tstellarAMDUnsubmitted Done Reply Inline Actions Also, should the return type be i64 instead of double? tstellarAMD: Also, should the return type be i64 instead of double?
		wdngAuthorUnsubmitted Not Done Reply Inline Actions Yes, code has been changed accordingly. Thanks! wdng: Yes, code has been changed accordingly. Thanks!
	// CI+ Intrinsics	// CI+ Intrinsics
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
Context not available.

lib/Target/AMDGPU/AMDGPUISelLowering.h

Context not available.
	DWORDADDR,	DWORDADDR,
	FRACT,	FRACT,
	CLAMP,	CLAMP,
		// This is SETCC with the full mask result which is used for a compare with a
		arsenmUnsubmitted Done Reply Inline Actions Needs a comment that this is setcc with the full mask result arsenm: Needs a comment that this is setcc with the full mask result
		arsenmUnsubmitted Done Reply Inline Actions Should have space after the //, and it should be capitalized and punctuated. Maybe clearer would be a compare with a result bit per item in the wavefront or something, mask result sounds more ambiguous maybe arsenm: Should have space after the //, and it should be capitalized and punctuated. Maybe clearer…
		// result bit per item in the wavefront.
		SETCC,

	// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.	// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
	// Denormals handled on some parts.	// Denormals handled on some parts.
Context not available.

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Context not available.
	NODE_NAME_CASE(RETURN)	NODE_NAME_CASE(RETURN)
	NODE_NAME_CASE(DWORDADDR)	NODE_NAME_CASE(DWORDADDR)
	NODE_NAME_CASE(FRACT)	NODE_NAME_CASE(FRACT)
		NODE_NAME_CASE(SETCC)
	NODE_NAME_CASE(CLAMP)	NODE_NAME_CASE(CLAMP)
	NODE_NAME_CASE(COS_HW)	NODE_NAME_CASE(COS_HW)
	NODE_NAME_CASE(SIN_HW)	NODE_NAME_CASE(SIN_HW)
Context not available.

lib/Target/AMDGPU/AMDGPUInstrInfo.td

Context not available.
	// out = (src1 > src0) ? 1 : 0	// out = (src1 > src0) ? 1 : 0
	def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;	def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;

		def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc
		SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
		]>;

		def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;

	def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",	def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
	SDTIntToFPOp, []>;	SDTIntToFPOp, []>;
Context not available.

lib/Target/AMDGPU/SIISelLowering.cpp

Context not available.
	#include "llvm/CodeGen/MachineInstrBuilder.h"	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/SelectionDAG.h"	#include "llvm/CodeGen/SelectionDAG.h"
		#include "llvm/CodeGen/Analysis.h"
	#include "llvm/IR/DiagnosticInfo.h"	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/Function.h"	#include "llvm/IR/Function.h"

		arsenmUnsubmitted Done Reply Inline Actions Variables should be capitalized and camel case. What happens if the cond code is out of range? There should probably be a clamp arsenm: Variables should be capitalized and camel case. What happens if the cond code is out of range?
		arsenmUnsubmitted Done Reply Inline Actions This looks like it goes over 80 characters arsenm: This looks like it goes over 80 characters
		arsenmUnsubmitted Done Reply Inline Actions Extra spaces between type and name arsenm: Extra spaces between type and name
		arsenmUnsubmitted Done Reply Inline Actions These should be put towards the end of the cases arsenm: These should be put towards the end of the cases
Context not available.
	return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,	return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
	Denominator, Numerator);	Denominator, Numerator);
	}	}
		case Intrinsic::amdgcn_icmp: {
		const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
		int CondCode= CD->getSExtValue();
		arsenmUnsubmitted Done Reply Inline Actions Space before = arsenm: Space before =

		if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE \|\|
		arsenmUnsubmitted Done Reply Inline Actions Instead of an assert, how about returning undef? this should also have a test. Same if the operand isn't really constant, you'll need to do the dyn_cast yourself arsenm: Instead of an assert, how about returning undef? this should also have a test. Same if the…
		CondCode >= ICmpInst::Predicate::BAD_ICMP_PREDICATE)
		arsenmUnsubmitted Done Reply Inline Actions You should do the range check before the static_cast since I think it is undefined behavior to have an out of bounds enum value inserted. This also won't work for fcmp, each should be handled in its own case with its own range check for the specific compare types' range arsenm: You should do the range check before the static_cast since I think it is undefined behavior to…
		return DAG.getUNDEF(VT);

		ICmpInst::Predicate IcInput =
		static_cast<ICmpInst::Predicate>(CondCode);
		ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
		return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
		Op.getOperand(2), DAG.getCondCode(CCOpcode));
		}
		case Intrinsic::amdgcn_fcmp: {
		const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
		int CondCode= CD->getSExtValue();
		arsenmUnsubmitted Done Reply Inline Actions Ditto arsenm: Ditto

		arsenmUnsubmitted Done Reply Inline Actions Should refer to FCmpInst arsenm: Should refer to FCmpInst
		if (CondCode <= FCmpInst::Predicate::FCMP_FALSE \|\|
		CondCode >= FCmpInst::Predicate::FCMP_TRUE)
		return DAG.getUNDEF(VT);

		FCmpInst::Predicate IcInput =
		static_cast<FCmpInst::Predicate>(CondCode);
		ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
		return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
		Op.getOperand(2), DAG.getCondCode(CCOpcode));
		}
	default:	default:
	return AMDGPUTargetLowering::LowerOperation(Op, DAG);	return AMDGPUTargetLowering::LowerOperation(Op, DAG);
	}	}
Context not available.

lib/Target/AMDGPU/SIInstructions.td

Context not available.
	>;	>;

	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//
		// V_ICMPIntrinsic Pattern.
		//===----------------------------------------------------------------------===//
		class ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
		(AMDGPUsetcc vt:$src0, vt:$src1, cond),
		(inst $src0, $src1)
		>;
		arsenmUnsubmitted Done Reply Inline Actions This can just be a class. You can also try adding the pattern dag to the v_cmp instruction definition patterns list (although I'm not 100% sure if the multiple patterns actually work). A multiclass might help if you don't want to repeat for i32/i64 arsenm: This can just be a class. You can also try adding the pattern dag to the v_cmp instruction…

		def : ICMP_Pattern <COND_EQ, V_CMP_EQ_I32_e64, i32>;
		def : ICMP_Pattern <COND_NE, V_CMP_NE_I32_e64, i32>;
		def : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
		def : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
		def : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
		def : ICMP_Pattern <COND_ULE, V_CMP_LE_U32_e64, i32>;
		arsenmUnsubmitted Done Reply Inline Actions All compare types should be defined. Additionally i64 and the FP ones are missing arsenm: All compare types should be defined. Additionally i64 and the FP ones are missing
		def : ICMP_Pattern <COND_SGT, V_CMP_GT_I32_e64, i32>;
		def : ICMP_Pattern <COND_SGE, V_CMP_GE_I32_e64, i32>;
		arsenmUnsubmitted Done Reply Inline Actions The unsigned should use the _U32 compare arsenm: The unsigned should use the _U32 compare
		def : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
		def : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;

		def : ICMP_Pattern <COND_EQ, V_CMP_EQ_I64_e64, i64>;
		def : ICMP_Pattern <COND_NE, V_CMP_NE_I64_e64, i64>;
		def : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
		def : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
		def : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
		def : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;
		arsenmUnsubmitted Done Reply Inline Actions Ditto arsenm: Ditto
		def : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;
		def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
		arsenmUnsubmitted Done Reply Inline Actions Ditto arsenm: Ditto
		def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
		def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;

		class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
		(i64(AMDGPUsetcc (vt(VOP3Mods vt:$src0, i32:$src0_modifiers)),
		(vt(VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
		arsenmUnsubmitted Not Done Reply Inline Actions Spaces before the types and the next ( arsenm: Spaces before the types and the next (
		(inst $src0_modifiers, $src0, $src1_modifiers, $src1,
		DSTCLAMP.NONE, DSTOMOD.NONE)
		>;

		def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
		def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
		def : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
		def : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
		def : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
		def : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;

		def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
		def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
		def : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
		def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
		def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
		def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;

		def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
		arsenmUnsubmitted Done Reply Inline Actions This also needs to be done for the unordered compares arsenm: This also needs to be done for the unordered compares
		def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
		def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
		def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;
		def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;
		def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;

		def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;
		def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;
		def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;
		def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
		def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
		def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;

		//===----------------------------------------------------------------------===//
		arsenmUnsubmitted Done Reply Inline Actions These are not the correct unordered comparison instructions, refer to the existing set of fcmp patterns for which to use arsenm: These are not the correct unordered comparison instructions, refer to the existing set of fcmp…
		tstellarAMDUnsubmitted Done Reply Inline Actions Unordered compares should select the V_CMP_N* instructions. Take a look at the instruction definitions to see which condition matches to which instruction. tstellarAMD: Unordered compares should select the V_CMP_N* instructions. Take a look at the instruction…
	// SMRD Patterns	// SMRD Patterns
	//===----------------------------------------------------------------------===//	//===----------------------------------------------------------------------===//

Context not available.

test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll

This file was added.

				; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s
				; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s

				declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
				declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
				declare float @llvm.fabs.f32(float) nounwind readnone
				arsenmUnsubmitted Not Done Reply Inline Actions Use the attribute group arsenm: Use the attribute group

				; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs:
				arsenmUnsubmitted Not Done Reply Inline Actions Missing test for invalid condition code value arsenm: Missing test for invalid condition code value
				; GCN: v_cmp_eq_f32_e64
				define void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) #1 {
				arsenmUnsubmitted Done Reply Inline Actions You can move the \|s outside of the regex and then you don't have to escape them arsenm: You can move the \|s outside of the regex and then you don't have to escape them
				%temp = call float @llvm.fabs.f32(float %a) nounwind readnone
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1)
				arsenmUnsubmitted Done Reply Inline Actions Don't need call site attributes arsenm: Don't need call site attributes
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}
				arsenmUnsubmitted Done Reply Inline Actions The call site does not need the attribute specified. Can you also test the other operand? The check line should check the actual operands, this currently does not actually check much arsenm: The call site does not need the attribute specified. Can you also test the other operand? The…

				; GCN-LABEL: {{^}}v_fcmp_f32_oeq:
				arsenmUnsubmitted Done Reply Inline Actions Still should test that both operands can have the source modifiers folded arsenm: Still should test that both operands can have the source modifiers folded
				; GCN: v_cmp_eq_f32_e64
				define void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_one:
				; GCN: v_cmp_neq_f32_e64
				define void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6)
				store i64 %result, i64 addrspace(1)* %out, align 4
				arsenmUnsubmitted Done Reply Inline Actions You should drop the suffix here to strengthen the test. It would be best to reduce to just v_cmp because something could commute the instruction arsenm: You should drop the suffix here to strengthen the test. It would be best to reduce to just…
				ret void
				arsenmUnsubmitted Done Reply Inline Actions it doesn't really matter, but there's no reason this test needs to under-align the stores, Fix these to be align 8 or remove the aligns arsenm: it doesn't really matter, but there's no reason this test needs to under-align the stores, Fix…
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_ogt:
				; GCN: v_cmp_gt_f32_e64
				define void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_oge:
				; GCN: v_cmp_ge_f32_e64
				define void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_olt:
				; GCN: v_cmp_lt_f32_e64
				define void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_ole:
				; GCN: v_cmp_le_f32_e64
				define void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}


				; GCN-LABEL: {{^}}v_fcmp_f32_ueq:
				; GCN: v_cmp_nlg_f32_e64
				define void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_une:
				; GCN: v_cmp_neq_f32_e64
				define void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_ugt:
				; GCN: v_cmp_nle_f32_e64
				define void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_uge:
				; GCN: v_cmp_nlt_f32_e64
				define void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f32_ult:
				; GCN: v_cmp_nge_f32_e64
				define void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				arsenmUnsubmitted Done Reply Inline Actions Missing unordered compares arsenm: Missing unordered compares
				; GCN-LABEL: {{^}}v_fcmp_f32_ule:
				; GCN: v_cmp_ngt_f32_e64
				define void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_oeq:
				; GCN: v_cmp_eq_f64_e64
				define void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_one:
				; GCN: v_cmp_neq_f64_e64
				define void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_ogt:
				; GCN: v_cmp_gt_f64_e64
				define void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_oge:
				; GCN: v_cmp_ge_f64_e64
				define void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_olt:
				; GCN: v_cmp_lt_f64_e64
				define void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_ole:
				; GCN: v_cmp_le_f64_e64
				define void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_ueq:
				; GCN: v_cmp_nlg_f64_e64
				define void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_une:
				; GCN: v_cmp_neq_f64_e64
				define void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_ugt:
				; GCN: v_cmp_nle_f64_e64
				define void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_uge:
				; GCN: v_cmp_nlt_f64_e64
				define void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_fcmp_f64_ult:
				; GCN: v_cmp_nge_f64_e64
				define void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}
				arsenmUnsubmitted Not Done Reply Inline Actions You should also add a test that uses fabs on the inputs to make sure that source modifiers are folded arsenm: You should also add a test that uses fabs on the inputs to make sure that source modifiers are…
				arsenmUnsubmitted Not Done Reply Inline Actions Still missing these tests arsenm: Still missing these tests
				wdngAuthorUnsubmitted Not Done Reply Inline Actions I have just created one "define void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) #1" and put it one the top of tests. Should I write fabs tests for all fcmp comparisons? wdng: I have just created one "define void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float…

				; GCN-LABEL: {{^}}v_fcmp_f64_ule:
				; GCN: v_cmp_ngt_f64_e64
				define void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) #1 {
				%result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				attributes #0 = { nounwind readnone convergent }
				attributes #1 = { nounwind }

test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll

This file was added.

				; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s
				; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s \| FileCheck -check-prefix=GCN %s

				declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
				declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0

				; GCN-LABEL: {{^}}v_icmp_i32_eq:
				arsenmUnsubmitted Done Reply Inline Actions Missing test for invalid condition code value arsenm: Missing test for invalid condition code value
				; GCN: v_cmp_eq_i32_e64
				define void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i32_ne:
				; GCN: v_cmp_ne_i32_e64
				define void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) #1 {
				arsenmUnsubmitted Done Reply Inline Actions Ditto arsenm: Ditto
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_u32_ugt:
				; GCN: v_cmp_gt_u32_e64
				define void @v_icmp_u32_ugt(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_u32_uge:
				; GCN: v_cmp_ge_u32_e64
				define void @v_icmp_u32_uge(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_u32_ult:
				; GCN: v_cmp_lt_u32_e64
				define void @v_icmp_u32_ult(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_u32_ule:
				; GCN: v_cmp_le_u32_e64
				define void @v_icmp_u32_ule(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i32_sgt:
				; GCN: v_cmp_gt_i32_e64
				define void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i32_sge:
				; GCN: v_cmp_ge_i32_e64
				define void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i32_slt:
				; GCN: v_cmp_lt_i32_e64
				define void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}
				; GCN-LABEL: {{^}}v_icmp_i32_sle:
				; GCN: v_cmp_le_i32_e64
				define void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i64_eq:
				; GCN: v_cmp_eq_i64_e64
				define void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i64_ne:
				; GCN: v_cmp_ne_i64_e64
				define void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_u64_ugt:
				; GCN: v_cmp_gt_u64_e64
				define void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_u64_uge:
				; GCN: v_cmp_ge_u64_e64
				define void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_u64_ult:
				; GCN: v_cmp_lt_u64_e64
				define void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_u64_ule:
				; GCN: v_cmp_le_u64_e64
				define void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i64_sgt:
				; GCN: v_cmp_gt_i64_e64
				define void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i64_sge:
				; GCN: v_cmp_ge_i64_e64
				define void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				; GCN-LABEL: {{^}}v_icmp_i64_slt:
				; GCN: v_cmp_lt_i64_e64
				define void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}
				; GCN-LABEL: {{^}}v_icmp_i64_sle:
				; GCN: v_cmp_le_i64_e64
				define void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) #1 {
				%result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41)
				store i64 %result, i64 addrspace(1)* %out, align 4
				ret void
				}

				attributes #0 = { nounwind readnone convergent }
				attributes #1 = { nounwind }

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU : Add intrinsics for compare with the full wavefront result, such as v_cmp_ne_i32, etc..
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 65538

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/AMDGPUISelLowering.h

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

lib/Target/AMDGPU/AMDGPUInstrInfo.td

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SIInstructions.td

test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll

test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll

This is an archive of the discontinued LLVM Phabricator instance.

AMDGPU : Add intrinsics for compare with the full wavefront result, such as v_cmp_ne_i32, etc..ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 65538

include/llvm/IR/IntrinsicsAMDGPU.td

lib/Target/AMDGPU/AMDGPUISelLowering.h

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

lib/Target/AMDGPU/AMDGPUInstrInfo.td

lib/Target/AMDGPU/SIISelLowering.cpp

lib/Target/AMDGPU/SIInstructions.td

test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll

test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll

AMDGPU : Add intrinsics for compare with the full wavefront result, such as v_cmp_ne_i32, etc..
ClosedPublic