Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -120,7 +120,10 @@ /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32, - OPERAND_KIMM16 + OPERAND_KIMM16, + + // Output register operand with 16-bit output. + OPERAND_REG_DEF16 }; } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -178,6 +178,16 @@ class VOPDstOperand : RegisterOperand ; +let OperandNamespace = "AMDGPU" in { +def VOPDstOperand16 : VOPDstOperand { + let OperandType = "OPERAND_REG_DEF16"; +} +} + +def VOPDstOperand32 : VOPDstOperand; +def VOPDstOperand64 : VOPDstOperand; +def VOPDstOperand128 : VOPDstOperand; + class VINTRPe op> : Enc32 { bits<8> vdst; bits<8> vsrc; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4082,7 +4082,21 @@ KnownZero = 0xffff0000; return; } - default: + default: { + // XXX - Are there any 16-bit output instructions with second defs? + // FIXME: + assert(Def->getOperand(0).getReg() == Op.getReg() && + Def->getOperand(0).getSubReg() == Op.getSubReg()); + if (Def->getDesc().OpInfo[0].OperandType == AMDGPU::OPERAND_REG_DEF16) { + // FIXME: This isn't true for all instructions on gfx9, where some new + // instructions default to leaving high bits intact and there is a control + // bit for old instructions to change zeroing behavior. + + KnownZero = 0xffff0000; + return; + } + return; + } } } Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -785,7 +785,7 @@ RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand, !if(!eq(VT.Size, 128), VOPDstOperand, !if(!eq(VT.Size, 64), VOPDstOperand, - !if(!eq(VT.Size, 16), VOPDstOperand, + !if(!eq(VT.Size, 16), VOPDstOperand16, VOPDstOperand)))); // else VT == i1 } Index: lib/Target/AMDGPU/VOP3Instructions.td =================================================================== --- lib/Target/AMDGPU/VOP3Instructions.td +++ lib/Target/AMDGPU/VOP3Instructions.td @@ -67,10 +67,10 @@ // only VOP instruction that implicitly reads VCC. let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in { def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> { - let Outs64 = (outs DstRC.RegClass:$vdst); + let Outs64 = (outs DstRC:$vdst); } def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> { - let Outs64 = (outs DstRC.RegClass:$vdst); + let Outs64 = (outs DstRC:$vdst); } } @@ -85,7 +85,7 @@ class VOP3_Profile : VOPProfile { // FIXME: Hack to stop printing _e64 - let Outs64 = (outs DstRC.RegClass:$vdst); + let Outs64 = (outs DstRC:$vdst); let Asm64 = " " # P.Asm64; }