Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -233,6 +233,7 @@ setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::FCANONICALIZE); @@ -3521,6 +3522,9 @@ case AMDGPUISD::CVT_F32_UBYTE3: { unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0; SDValue Src = N->getOperand(0); + if (Src.getOpcode() == ISD::ZERO_EXTEND && + Src.getOperand(0).getOpcode() == ISD::SRL) + Src = Src.getOperand(0); // TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero. if (Src.getOpcode() == ISD::SRL) { @@ -3529,10 +3533,14 @@ // cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x if (const ConstantSDNode *C = dyn_cast(Src.getOperand(1))) { + SDValue SrcZExt = DAG.getZExtOrTrunc(Src.getOperand(0), + SDLoc(Src.getOperand(0)), + EVT(MVT::i32)); + unsigned SrcOffset = C->getZExtValue() + 8 * Offset; if (SrcOffset < 32 && SrcOffset % 8 == 0) { return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, DL, - MVT::f32, Src.getOperand(0)); + MVT::f32, SrcZExt); } } } @@ -3550,7 +3558,7 @@ break; } - + case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: { return performUCharToFloatCombine(N, DCI); }