Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -757,7 +757,8 @@ void visitInlineAsm(const CallBase &Call); void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); - void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic, + SDNodeFlags Flags); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); void visitVAStart(const CallInst &I); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4602,7 +4602,8 @@ /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, - unsigned Intrinsic) { + unsigned Intrinsic, + SDNodeFlags Flags) { // Ignore the callsite's attributes. A specific call site may be marked with // readnone, but the lowering code will expect the chain based on the // definition. @@ -4679,6 +4680,7 @@ } else { Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); } + Result->setFlags(Flags); if (HasChain) { SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); @@ -5611,7 +5613,7 @@ switch (Intrinsic) { default: // By default, turn this into a target intrinsic node. - visitTargetIntrinsic(I, Intrinsic); + visitTargetIntrinsic(I, Intrinsic, Flags); return; case Intrinsic::vscale: { match(&I, m_VScale(DAG.getDataLayout())); Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6620,8 +6620,8 @@ case Intrinsic::amdgcn_ballot: return lowerBALLOTIntrinsic(*this, Op.getNode(), DAG); case Intrinsic::amdgcn_fmed3: - return DAG.getNode(AMDGPUISD::FMED3, DL, VT, - Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + return DAG.getNode(AMDGPUISD::FMED3, DL, VT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3), Op->getFlags()); case Intrinsic::amdgcn_fdot2: return DAG.getNode(AMDGPUISD::FDOT2, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), Index: llvm/test/CodeGen/AMDGPU/clamp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/clamp.ll +++ llvm/test/CodeGen/AMDGPU/clamp.ll @@ -336,7 +336,7 @@ ; GCN-LABEL: {{^}}v_clamp_nnan_med3_ayb_f32: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] -; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0 +; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}} define amdgpu_kernel void @v_clamp_nnan_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid