Index: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -189,15 +189,24 @@ unsigned Val = Mod.getImm(); if ((Val & SISrcMods::OP_SEL_0) || !(Val & SISrcMods::OP_SEL_1)) return false; - // If upper part is all zero we do not need op_sel_hi. - if (!isUInt<16>(Fold.ImmToFold)) { - if (!(Fold.ImmToFold & 0xffff)) { - Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0); + // Only apply the following transformation if that operand requries + // a packed immediate. + switch (TII.get(Opcode).OpInfo[OpNo].OperandType) { + case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: + case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: + // If upper part is all zero we do not need op_sel_hi. + if (!isUInt<16>(Fold.ImmToFold)) { + if (!(Fold.ImmToFold & 0xffff)) { + Mod.setImm(Mod.getImm() | SISrcMods::OP_SEL_0); + Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); + Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff); + return true; + } Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); - Old.ChangeToImmediate((Fold.ImmToFold >> 16) & 0xffff); - return true; } - Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); + break; + default: + break; } } Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll @@ -33,3 +33,10 @@ store float %r.val, float addrspace(1)* %r ret void } + +; GFX906-LABEL: {{^}}fdot2_inline_literal +; GFX906: v_dot2_f32_f16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 1.0 +define float @fdot2_inline_literal(<2 x half> %a, <2 x half> %b) { + %ret = tail call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float 1.0, i1 false) + ret float %ret +}