Index: lib/Target/R600/SIISelLowering.cpp =================================================================== --- lib/Target/R600/SIISelLowering.cpp +++ lib/Target/R600/SIISelLowering.cpp @@ -610,14 +610,19 @@ case AMDGPU::BRANCH: return BB; case AMDGPU::V_SUB_F64: { unsigned DestReg = MI->getOperand(0).getReg(); - BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg) + MachineInstr *NewMI = + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg) .addImm(0) // SRC0 modifiers - .addReg(MI->getOperand(1).getReg()) + .addOperand(MI->getOperand(1)) .addImm(1) // SRC1 modifiers - .addReg(MI->getOperand(2).getReg()) + .addOperand(MI->getOperand(2)) .addImm(0) // CLAMP .addImm(0); // OMOD MI->eraseFromParent(); + + // This is surprisingly called after AdjustInstrPostInstrSelection, so + // legalize the operands now in case we would need to do that. + TII->legalizeOperands(NewMI); break; } case AMDGPU::SI_RegisterStorePseudo: { Index: lib/Target/R600/SIInstructions.td =================================================================== --- lib/Target/R600/SIInstructions.td +++ lib/Target/R600/SIInstructions.td @@ -1939,7 +1939,7 @@ def V_SUB_F64 : InstSI < (outs VReg_64:$dst), - (ins VReg_64:$src0, VReg_64:$src1), + (ins VSrc_64:$src0, VSrc_64:$src1), "v_sub_f64 $dst, $src0, $src1", [(set f64:$dst, (fsub f64:$src0, f64:$src1))] >; Index: test/CodeGen/R600/fneg-fabs.f64.ll =================================================================== --- test/CodeGen/R600/fneg-fabs.f64.ll +++ test/CodeGen/R600/fneg-fabs.f64.ll @@ -6,7 +6,7 @@ ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64: ; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff ; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]] -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}} define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) { %fabs = call double @llvm.fabs.f64(double %x) %fsub = fsub double -0.000000e+00, %fabs Index: test/CodeGen/R600/fneg.f64.ll =================================================================== --- test/CodeGen/R600/fneg.f64.ll +++ test/CodeGen/R600/fneg.f64.ll @@ -38,8 +38,7 @@ ; unless the target returns true for isNegFree() ; FUNC-LABEL: {{^}}fneg_free_f64: -; FIXME: Unnecessary copy to VGPRs -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, 0, -{{s\[[0-9]+:[0-9]+\]$}} define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { %bc = bitcast i64 %in to double %fsub = fsub double 0.0, %bc Index: test/CodeGen/R600/fsub64.ll =================================================================== --- test/CodeGen/R600/fsub64.ll +++ test/CodeGen/R600/fsub64.ll @@ -4,9 +4,75 @@ ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { - %r0 = load double addrspace(1)* %in1 - %r1 = load double addrspace(1)* %in2 - %r2 = fsub double %r0, %r1 - store double %r2, double addrspace(1)* %out - ret void + %r0 = load double addrspace(1)* %in1 + %r1 = load double addrspace(1)* %in2 + %r2 = fsub double %r0, %r1 + store double %r2, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double %a, %b + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_imm_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], 4.0, -s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_imm_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double 4.0, %a + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_imm_inv_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -4.0, s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_imm_inv_f64(double addrspace(1)* %out, double %a, double %b) { + %sub = fsub double %a, 4.0 + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_self_f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -s\[[0-9]+:[0-9]+\]}} +define void @s_fsub_self_f64(double addrspace(1)* %out, double %a) { + %sub = fsub double %a, %a + store double %sub, double addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_v2f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @fsub_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) { + %sub = fsub <2 x double> %a, %b + store <2 x double> %sub, <2 x double> addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}fsub_v4f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) { + %b_ptr = getelementptr <4 x double> addrspace(1)* %in, i32 1 + %a = load <4 x double> addrspace(1)* %in + %b = load <4 x double> addrspace(1)* %b_ptr + %result = fsub <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out + ret void +} + +; SI-LABEL: {{^}}s_fsub_v4f64: +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +define void @s_fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) { + %result = fsub <4 x double> %a, %b + store <4 x double> %result, <4 x double> addrspace(1)* %out, align 16 + ret void }