Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2393,7 +2393,10 @@ unsigned Mods = 0; MachineInstr *MI = MRI.getVRegDef(Src); - if (MI && MI->getOpcode() == AMDGPU::G_FNEG) { + if (MI && MI->getOpcode() == AMDGPU::G_FNEG && + // It's possible to see an f32 fneg here, but unlikely. + // TODO: Treat f32 fneg as only high bit. + MRI.getType(Src) == LLT::vector(2, 16)) { Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); Src = MI->getOperand(1).getReg(); MI = MRI.getVRegDef(Src); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll @@ -101,15 +101,17 @@ ; GFX906-LABEL: v_fdot2_neg_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_fdot2_neg_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.c = fneg float %c %r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false) Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll @@ -276,21 +276,24 @@ ; GFX906-LABEL: v_sdot2_fnegf32_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: v_sdot2_fnegf32_c: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX908-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; GFX908-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_sdot2_fnegf32_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.c = fneg float %c %cast.neg.c = bitcast float %neg.c to i32 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll @@ -98,15 +98,17 @@ ; GFX906-LABEL: v_sdot4_fnegf32_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_dot4_i32_i8 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] +; GFX906-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 +; GFX906-NEXT: v_dot4_i32_i8 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_sdot4_fnegf32_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_dot4_i32_i8 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] +; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_dot4_i32_i8 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg float %a %cast.neg.a = bitcast float %neg.a to i32 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll @@ -51,15 +51,17 @@ ; GFX906-LABEL: v_sdot8_fnegf32_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] +; GFX906-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 +; GFX906-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_sdot8_fnegf32_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] +; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_dot8_i32_i4 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg float %a %cast.neg.a = bitcast float %neg.a to i32 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll @@ -276,21 +276,24 @@ ; GFX906-LABEL: v_udot2_fnegf32_c: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; GFX906-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: v_udot2_fnegf32_c: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX908-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 +; GFX908-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_udot2_fnegf32_c: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.c = fneg float %c %cast.neg.c = bitcast float %neg.c to i32 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll @@ -98,15 +98,17 @@ ; GFX906-LABEL: v_udot4_fnegf32_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] +; GFX906-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 +; GFX906-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_udot4_fnegf32_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] +; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_dot4_u32_u8 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg float %a %cast.neg.a = bitcast float %neg.a to i32 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll @@ -51,15 +51,17 @@ ; GFX906-LABEL: v_udot8_fnegf32_a: ; GFX906: ; %bb.0: ; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX906-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] +; GFX906-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 +; GFX906-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 ; GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_udot8_fnegf32_a: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] +; GFX10-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 ; GFX10-NEXT: ; implicit-def: $vcc_hi +; GFX10-NEXT: v_dot8_u32_u4 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] %neg.a = fneg float %a %cast.neg.a = bitcast float %neg.a to i32