Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -86,6 +86,10 @@ unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const; protected: + bool swapSourceModifiers(MachineInstr &MI, + MachineOperand &Src0, unsigned Src0OpName, + MachineOperand &Src1, unsigned Src1OpName) const; + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, unsigned OpIdx0, unsigned OpIdx1) const override; @@ -144,7 +148,12 @@ unsigned getMovOpcode(const TargetRegisterClass *DstRC) const; LLVM_READONLY - int commuteOpcode(const MachineInstr &MI) const; + int commuteOpcode(unsigned Opc) const; + + LLVM_READONLY + inline int commuteOpcode(const MachineInstr &MI) const { + return commuteOpcode(MI.getOpcode()); + } bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const override; Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -497,9 +497,7 @@ } } -int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const { - const unsigned Opcode = MI.getOpcode(); - +int SIInstrInfo::commuteOpcode(unsigned Opcode) const { int NewOpc; // Try to map original to commuted opcode @@ -908,91 +906,90 @@ return true; } -/// Commutes the operands in the given instruction. -/// The commutable operands are specified by their indices OpIdx0 and OpIdx1. -/// -/// Do not call this method for a non-commutable instruction or for -/// non-commutable pair of operand indices OpIdx0 and OpIdx1. -/// Even though the instruction is commutable, the method may still -/// fail to commute the operands, null pointer is returned in such cases. -MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, - unsigned OpIdx0, - unsigned OpIdx1) const { - int CommutedOpcode = commuteOpcode(MI); - if (CommutedOpcode == -1) - return nullptr; +bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI, + MachineOperand &Src0, + unsigned Src0OpName, + MachineOperand &Src1, + unsigned Src1OpName) const { + MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName); + if (!Src0Mods) + return false; - int Src0Idx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); - MachineOperand &Src0 = MI.getOperand(Src0Idx); - if (!Src0.isReg()) + MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName); + assert(Src1Mods && + "All commutable instructions have both src0 and src1 modifiers"); + + int Src0ModsVal = Src0Mods->getImm(); + int Src1ModsVal = Src1Mods->getImm(); + + Src1Mods->setImm(Src0ModsVal); + Src0Mods->setImm(Src1ModsVal); + return true; +} + +static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, + MachineOperand &RegOp, + MachineOperand &ImmOp) { + // TODO: Handle other immediate like types. + if (!ImmOp.isImm()) return nullptr; - int Src1Idx = - AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1); + int64_t ImmVal = ImmOp.getImm(); + ImmOp.ChangeToRegister(RegOp.getReg(), false, false, + RegOp.isKill(), RegOp.isDead(), RegOp.isUndef(), + RegOp.isDebug()); + ImmOp.setSubReg(RegOp.getSubReg()); + RegOp.ChangeToImmediate(ImmVal); + return &MI; +} - if ((OpIdx0 != static_cast(Src0Idx) || - OpIdx1 != static_cast(Src1Idx)) && - (OpIdx0 != static_cast(Src1Idx) || - OpIdx1 != static_cast(Src0Idx))) +MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned Src0Idx, + unsigned Src1Idx) const { + assert(!NewMI && "this should never be used"); + + unsigned Opc = MI.getOpcode(); + int CommutedOpcode = commuteOpcode(Opc); + if (CommutedOpcode == -1) return nullptr; - MachineOperand &Src1 = MI.getOperand(Src1Idx); + assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) == + static_cast(Src0Idx) && + AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) == + static_cast(Src1Idx) && + "inconsistency with findCommutedOpIndices"); - if (isVOP2(MI) || isVOPC(MI)) { - const MCInstrDesc &InstrDesc = MI.getDesc(); - // For VOP2 and VOPC instructions, any operand type is valid to use for - // src0. Make sure we can use the src0 as src1. - // - // We could be stricter here and only allow commuting if there is a reason - // to do so. i.e. if both operands are VGPRs there is no real benefit, - // although MachineCSE attempts to find matches by commuting. - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); - if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) - return nullptr; - } + MachineOperand &Src0 = MI.getOperand(Src0Idx); + MachineOperand &Src1 = MI.getOperand(Src1Idx); - MachineInstr *CommutedMI = &MI; - if (!Src1.isReg()) { - // Allow commuting instructions with Imm operands. - if (NewMI || !Src1.isImm() || (!isVOP2(MI) && !isVOP3(MI))) { - return nullptr; - } - // Be sure to copy the source modifiers to the right place. - if (MachineOperand *Src0Mods = - getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) { - MachineOperand *Src1Mods = - getNamedOperand(MI, AMDGPU::OpName::src1_modifiers); - - int Src0ModsVal = Src0Mods->getImm(); - if (!Src1Mods && Src0ModsVal != 0) - return nullptr; - - // XXX - This assert might be a lie. It might be useful to have a neg - // modifier with 0.0. - int Src1ModsVal = Src1Mods->getImm(); - assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates"); - - Src1Mods->setImm(Src0ModsVal); - Src0Mods->setImm(Src1ModsVal); + MachineInstr *CommutedMI = nullptr; + if (Src0.isReg() && Src1.isReg()) { + if (isOperandLegal(MI, Src1Idx, &Src0)) { + // Be sure to copy the source modifiers to the right place. + CommutedMI + = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx); } - unsigned Reg = Src0.getReg(); - unsigned SubReg = Src0.getSubReg(); - if (Src1.isImm()) - Src0.ChangeToImmediate(Src1.getImm()); - else - llvm_unreachable("Should only have immediates"); - - Src1.ChangeToRegister(Reg, false); - Src1.setSubReg(SubReg); + } else if (Src0.isReg() && !Src1.isReg()) { + // src0 should always be able to support any operand type, so no need to + // check operand legality. + CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1); + } else if (!Src0.isReg() && Src1.isReg()) { + if (isOperandLegal(MI, Src1Idx, &Src0)) + CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); } else { - CommutedMI = - TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1); + //llvm_unreachable("Found two non-register operands to commute"); + // FIXME: Found two non registers to commute. This does happen. + return nullptr; } - if (CommutedMI) + + if (CommutedMI) { + swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers, + Src1, AMDGPU::OpName::src1_modifiers); + CommutedMI->setDesc(get(CommutedOpcode)); + } return CommutedMI; } @@ -1002,8 +999,7 @@ // TargetInstrInfo::commuteInstruction uses it. bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const { - const MCInstrDesc &MCID = MI.getDesc(); - if (!MCID.isCommutable()) + if (!MI.isCommutable()) return false; unsigned Opc = MI.getOpcode(); @@ -1011,31 +1007,10 @@ if (Src0Idx == -1) return false; - // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on - // immediate. Also, immediate src0 operand is not handled in - // SIInstrInfo::commuteInstruction(); - if (!MI.getOperand(Src0Idx).isReg()) - return false; - int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); if (Src1Idx == -1) return false; - MachineOperand &Src1 = MI.getOperand(Src1Idx); - if (Src1.isImm()) { - // SIInstrInfo::commuteInstruction() does support commuting the immediate - // operand src1 in 2 and 3 operand instructions. - if (!isVOP2(MI.getOpcode()) && !isVOP3(MI.getOpcode())) - return false; - } else if (Src1.isReg()) { - // If any source modifiers are set, the generic instruction commuting won't - // understand how to copy the source modifiers. - if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) || - hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers)) - return false; - } else - return false; - return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx); } Index: test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- test/CodeGen/AMDGPU/addrspacecast.ll +++ test/CodeGen/AMDGPU/addrspacecast.ll @@ -11,7 +11,7 @@ ; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]] +; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1 ; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]] ; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 @@ -34,7 +34,7 @@ ; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] -; HSA-DAG: v_cmp_ne_i32_e64 vcc, -1, [[PTR]] +; HSA-DAG: v_cmp_ne_i32_e64 vcc, [[PTR]], -1 ; HSA-DAG: v_cndmask_b32_e32 v[[HI:[0-9]+]], 0, [[VAPERTURE]] ; HSA-DAG: v_cndmask_b32_e32 v[[LO:[0-9]+]], 0, [[VPTR]] ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 @@ -79,7 +79,7 @@ ; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} -; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}} +; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} @@ -96,7 +96,7 @@ ; HSA: enable_sgpr_queue_ptr = 0 ; HSA: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}} -; HSA-DAG: v_cmp_ne_i64_e64 vcc, 0, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}} +; HSA-DAG: v_cmp_ne_i64_e64 vcc, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], s[[PTR_LO]] ; HSA-DAG: v_cndmask_b32_e32 [[CASTPTR:v[0-9]+]], -1, v[[VPTR_LO]] ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 0{{$}} Index: test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll =================================================================== --- test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll +++ test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll @@ -5,7 +5,7 @@ ; Produces error after adding an implicit def to v_cndmask_b32 ; GCN-LABEL: {{^}}vcc_shrink_vcc_def: -; GCN: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}} +; GCN: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; GCN: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc ; GCN: v_cndmask_b32_e64 v1, 0, 1, s{{\[[0-9]+:[0-9]+\]}} define void @vcc_shrink_vcc_def(float %arg, i32 %arg1, float %arg2, i32 %arg3) { Index: test/CodeGen/AMDGPU/commute_modifiers.ll =================================================================== --- test/CodeGen/AMDGPU/commute_modifiers.ll +++ test/CodeGen/AMDGPU/commute_modifiers.ll @@ -6,7 +6,7 @@ ; FUNC-LABEL: @commute_add_imm_fabs_f32 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]| +; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, 2.0 ; SI: buffer_store_dword [[REG]] define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -20,7 +20,7 @@ ; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]| +; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -4.0 ; SI: buffer_store_dword [[REG]] define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 @@ -51,7 +51,7 @@ ; FUNC-LABEL: @commute_add_lit_fabs_f32 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000 -; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]] +; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[K]], |[[X]]| ; SI: buffer_store_dword [[REG]] define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 Index: test/CodeGen/AMDGPU/ctlz.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz.ll +++ test/CodeGen/AMDGPU/ctlz.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -19,7 +19,7 @@ ; FUNC-LABEL: {{^}}s_ctlz_i32: ; SI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} ; SI-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]] -; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]] +; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], [[VAL]], 0{{$}} ; SI-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]] ; SI: buffer_store_dword [[RESULT]] @@ -112,7 +112,7 @@ ; FUNC-LABEL: {{^}}s_ctlz_i64: ; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]] +; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}} ; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]] ; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 ; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]] Index: test/CodeGen/AMDGPU/ctlz_zero_undef.ll =================================================================== --- test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s @@ -92,7 +92,7 @@ ; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64: ; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}} -; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]] +; SI-DAG: v_cmp_eq_i32_e64 vcc, s[[HI]], 0{{$}} ; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]] ; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32 ; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]] Index: test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll =================================================================== --- test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll +++ test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=SI -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare float @llvm.fabs.f32(float) #1 @@ -18,7 +18,7 @@ } ; FUNC-LABEL: {{^}}cvt_flr_i32_f32_1: -; SI: v_add_f32_e64 [[TMP:v[0-9]+]], 1.0, s{{[0-9]+}} +; SI: v_add_f32_e64 [[TMP:v[0-9]+]], s{{[0-9]+}}, 1.0 ; SI-SAFE-NOT: v_cvt_flr_i32_f32 ; SI-NONAN: v_cvt_flr_i32_f32_e32 v{{[0-9]+}}, [[TMP]] ; SI: s_endpgm Index: test/CodeGen/AMDGPU/fabs.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.f64.ll +++ test/CodeGen/AMDGPU/fabs.f64.ll @@ -55,7 +55,7 @@ ; SI-LABEL: {{^}}fabs_fold_f64: ; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-NOT: and -; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}} +; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]| ; SI: s_endpgm define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) { %fabs = call double @llvm.fabs.f64(double %in0) @@ -67,7 +67,7 @@ ; SI-LABEL: {{^}}fabs_fn_fold_f64: ; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI-NOT: and -; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}} +; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]| ; SI: s_endpgm define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) { %fabs = call double @fabs(double %in0) Index: test/CodeGen/AMDGPU/fabs.ll =================================================================== --- test/CodeGen/AMDGPU/fabs.ll +++ test/CodeGen/AMDGPU/fabs.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s @@ -75,7 +75,7 @@ ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c ; GCN-NOT: and -; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} +; GCN: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |[[ABS_VALUE]]| define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) { %fabs = call float @fabs(float %in0) %fmul = fmul float %fabs, %in1 @@ -87,7 +87,7 @@ ; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb ; VI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c ; GCN-NOT: and -; GCN: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}} +; GCN: v_mul_f32_e64 v{{[0-9]+}}, v{{[0-9]+}}, |[[ABS_VALUE]]| define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) { %fabs = call float @llvm.fabs.f32(float %in0) %fmul = fmul float %fabs, %in1 Index: test/CodeGen/AMDGPU/fceil64.ll =================================================================== --- test/CodeGen/AMDGPU/fceil64.ll +++ test/CodeGen/AMDGPU/fceil64.ll @@ -25,7 +25,7 @@ ; SI-DAG: cmp_lt_i32 ; SI-DAG: cndmask_b32 ; SI-DAG: cndmask_b32 -; SI-DAG: v_cmp_lt_f64 +; SI-DAG: v_cmp_gt_f64 ; SI-DAG: v_cmp_lg_f64 ; SI-DAG: v_cndmask_b32 ; SI: v_cndmask_b32 Index: test/CodeGen/AMDGPU/fmaxnum.ll =================================================================== --- test/CodeGen/AMDGPU/fmaxnum.ll +++ test/CodeGen/AMDGPU/fmaxnum.ll @@ -234,7 +234,7 @@ } ; FUNC-LABEL: @fmax_var_immediate_f32 -; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} +; SI: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MAX_DX10 @@ -246,7 +246,7 @@ } ; FUNC-LABEL: @fmax_immediate_var_f32 -; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} +; SI: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} Index: test/CodeGen/AMDGPU/fminnum.ll =================================================================== --- test/CodeGen/AMDGPU/fminnum.ll +++ test/CodeGen/AMDGPU/fminnum.ll @@ -233,7 +233,7 @@ } ; FUNC-LABEL: @fmin_var_immediate_f32 -; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} +; SI: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} @@ -244,7 +244,7 @@ } ; FUNC-LABEL: @fmin_immediate_var_f32 -; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}} +; SI: v_min_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} Index: test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll =================================================================== --- test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll +++ test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll @@ -72,7 +72,7 @@ } ; GCN-LABEL: {{^}}fmul_x2_xn2: -; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], -4.0, [[X:s[0-9]+]] +; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], -4.0 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]] ; GCN: buffer_store_dword [[RESULT]] define void @fmul_x2_xn2(float addrspace(1)* %out, float %x, float %y) #0 { Index: test/CodeGen/AMDGPU/fneg-fabs.f64.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.f64.ll +++ test/CodeGen/AMDGPU/fneg-fabs.f64.ll @@ -1,11 +1,11 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s ; FIXME: Check something here. Currently it seems fabs + fneg aren't ; into 2 modifiers, although theoretically that should work. ; GCN-LABEL: {{^}}fneg_fabs_fadd_f64: -; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}| +; GCN: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, -|v{{\[[0-9]+:[0-9]+\]}}|, {{s\[[0-9]+:[0-9]+\]}} define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) { %fabs = call double @llvm.fabs.f64(double %x) %fsub = fsub double -0.000000e+00, %fabs @@ -25,7 +25,7 @@ } ; GCN-LABEL: {{^}}fneg_fabs_fmul_f64: -; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}| +; GCN: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|, {{s\[[0-9]+:[0-9]+\]}} define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) { %fabs = call double @llvm.fabs.f64(double %x) %fsub = fsub double -0.000000e+00, %fabs Index: test/CodeGen/AMDGPU/fneg-fabs.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-fabs.ll +++ test/CodeGen/AMDGPU/fneg-fabs.ll @@ -1,10 +1,10 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32: ; SI-NOT: and -; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}| +; SI: v_subrev_f32_e64 {{v[0-9]+}}, |{{v[0-9]+}}|, {{s[0-9]+}} define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) { %fabs = call float @llvm.fabs.f32(float %x) %fsub = fsub float -0.000000e+00, %fabs @@ -15,7 +15,7 @@ ; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32: ; SI-NOT: and -; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}| +; SI: v_mul_f32_e64 {{v[0-9]+}}, -|{{v[0-9]+}}|, {{s[0-9]+}} ; SI-NOT: and define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) { %fabs = call float @llvm.fabs.f32(float %x) Index: test/CodeGen/AMDGPU/fsub64.ll =================================================================== --- test/CodeGen/AMDGPU/fsub64.ll +++ test/CodeGen/AMDGPU/fsub64.ll @@ -39,7 +39,7 @@ } ; SI-LABEL: {{^}}s_fsub_f64: -; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}} +; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\], s\[[0-9]+:[0-9]+\]}} define void @s_fsub_f64(double addrspace(1)* %out, double %a, double %b) { %sub = fsub double %a, %b store double %sub, double addrspace(1)* %out Index: test/CodeGen/AMDGPU/imm.ll =================================================================== --- test/CodeGen/AMDGPU/imm.ll +++ test/CodeGen/AMDGPU/imm.ll @@ -128,7 +128,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0.0 @@ -138,7 +138,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0.5 @@ -148,7 +148,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -0.5 @@ -158,7 +158,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 1.0 @@ -168,7 +168,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -1.0 @@ -178,7 +178,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 2.0 @@ -188,7 +188,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -2.0 @@ -198,7 +198,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 4.0 @@ -208,7 +208,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -4.0 @@ -240,7 +240,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_1_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 1{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0x36a0000000000000 @@ -250,7 +250,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_2_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}} +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 2{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0x36b0000000000000 @@ -260,7 +260,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_16_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 16 ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0x36e0000000000000 @@ -270,7 +270,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -1{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0xffffffffe0000000 @@ -280,7 +280,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -2{{$}} ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0xffffffffc0000000 @@ -290,7 +290,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], -16 ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0xfffffffe00000000 @@ -300,7 +300,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_63_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 63 ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0x36ff800000000000 @@ -310,7 +310,7 @@ ; CHECK-LABEL: {{^}}add_inline_imm_64_f32: ; CHECK: s_load_dword [[VAL:s[0-9]+]] -; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]] +; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], [[VAL]], 64 ; CHECK: buffer_store_dword [[REG]] define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0x3700000000000000 Index: test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll +++ test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll @@ -7,7 +7,7 @@ ; FUNC-LABEL: {{^}}clamp_0_1_f32: ; SI: s_load_dword [[ARG:s[0-9]+]], -; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}} +; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0 clamp{{$}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm @@ -20,7 +20,7 @@ ; FUNC-LABEL: {{^}}clamp_fabs_0_1_f32: ; SI: s_load_dword [[ARG:s[0-9]+]], -; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, |[[ARG]]| clamp{{$}} +; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], |[[ARG]]|, 0 clamp{{$}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @clamp_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind { @@ -32,7 +32,7 @@ ; FUNC-LABEL: {{^}}clamp_fneg_0_1_f32: ; SI: s_load_dword [[ARG:s[0-9]+]], -; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -[[ARG]] clamp{{$}} +; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], -[[ARG]], 0 clamp{{$}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @clamp_fneg_0_1_f32(float addrspace(1)* %out, float %src) nounwind { @@ -44,7 +44,7 @@ ; FUNC-LABEL: {{^}}clamp_fneg_fabs_0_1_f32: ; SI: s_load_dword [[ARG:s[0-9]+]], -; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -|[[ARG]]| clamp{{$}} +; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], -|[[ARG]]|, 0 clamp{{$}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @clamp_fneg_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind { Index: test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -77,7 +77,7 @@ } ; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc: -; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}} +; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; SI: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} define void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind { %cmp = icmp eq i32 %i, 0 @@ -110,7 +110,7 @@ ; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} ; SI-DAG: v_cmp_eq_i32_e32 [[CMP0:vcc]], 0, v{{[0-9]+}} -; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}} +; SI-DAG: v_cmp_ne_i32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} ; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]] ; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]] ; SI: s_endpgm Index: test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.ll @@ -6,7 +6,7 @@ declare float @llvm.fabs.f32(float) #0 ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_with_fabs: -; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}, |{{v[0-9]+}}| +; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{v[0-9]+}}|, {{s[0-9]+}} define void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { %temp = call float @llvm.fabs.f32(float %a) %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1) @@ -15,7 +15,7 @@ } ; GCN-LABEL: {{^}}v_fcmp_f32_oeq_both_operands_with_fabs: -; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{s[0-9]+}}|, |{{v[0-9]+}}| +; GCN: v_cmp_eq_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, |{{v[0-9]+}}|, |{{s[0-9]+}}| define void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { %temp = call float @llvm.fabs.f32(float %a) %src_input = call float @llvm.fabs.f32(float %src) Index: test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -27,7 +27,7 @@ } ; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32: -; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |v{{[0-9]+}}| +; GCN: v_mul_legacy_f32_e64 v{{[0-9]+}}, |v{{[0-9]+}}|, |s{{[0-9]+}}| define void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) Index: test/CodeGen/AMDGPU/llvm.round.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.round.ll +++ test/CodeGen/AMDGPU/llvm.round.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s @@ -9,7 +9,7 @@ ; SI-DAG: v_sub_f32_e32 [[SUB:v[0-9]+]], [[SX]], [[TRUNC]] ; SI-DAG: v_mov_b32_e32 [[VX:v[0-9]+]], [[SX]] ; SI: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[K]], 1.0, [[VX]] -; SI: v_cmp_le_f32_e64 vcc, 0.5, |[[SUB]]| +; SI: v_cmp_ge_f32_e64 vcc, |[[SUB]]|, 0.5 ; SI: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, [[VX]] ; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SEL]], [[TRUNC]] ; SI: buffer_store_dword [[RESULT]] Index: test/CodeGen/AMDGPU/madak.ll =================================================================== --- test/CodeGen/AMDGPU/madak.ll +++ test/CodeGen/AMDGPU/madak.ll @@ -151,7 +151,7 @@ ; GCN-LABEL: {{^}}no_madak_src0_modifier_f32: ; GCN: buffer_load_dword [[VA:v[0-9]+]] ; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} +; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}} ; GCN: s_endpgm define void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -173,7 +173,7 @@ ; GCN-LABEL: {{^}}no_madak_src1_modifier_f32: ; GCN: buffer_load_dword [[VA:v[0-9]+]] ; GCN: buffer_load_dword [[VB:v[0-9]+]] -; GCN: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}} +; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} ; GCN: s_endpgm define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone Index: test/CodeGen/AMDGPU/madmk.ll =================================================================== --- test/CodeGen/AMDGPU/madmk.ll +++ test/CodeGen/AMDGPU/madmk.ll @@ -128,7 +128,8 @@ ; GCN-LABEL: {{^}}no_madmk_src0_modifier_f32: ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}} +; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000 +; GCN: v_mad_f32 {{v[0-9]+}}, [[VK]], |[[VA]]|, [[VB]] define void @no_madmk_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid Index: test/CodeGen/AMDGPU/select-vectors.ll =================================================================== --- test/CodeGen/AMDGPU/select-vectors.ll +++ test/CodeGen/AMDGPU/select-vectors.ll @@ -58,7 +58,7 @@ ; FUNC-LABEL: {{^}}v_select_v4i32: ; SI: buffer_load_dwordx4 -; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}} +; SI: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} @@ -96,7 +96,7 @@ ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]] ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]] ; SI-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]] -; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}} +; SI-DAG: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; SI: v_cndmask_b32_e32 ; SI: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]] @@ -112,7 +112,7 @@ ; FUNC-LABEL: {{^}}s_select_v4f32: ; SI: s_load_dwordx4 ; SI: s_load_dwordx4 -; SI: v_cmp_eq_i32_e64 vcc, 0, s{{[0-9]+}} +; SI: v_cmp_eq_i32_e64 vcc, s{{[0-9]+}}, 0{{$}} ; SI: v_cndmask_b32_e32 ; SI: v_cndmask_b32_e32 @@ -129,7 +129,7 @@ ; FUNC-LABEL: {{^}}v_select_v4f32: ; SI: buffer_load_dwordx4 -; SI: v_cmp_gt_u32_e64 vcc, 32, s{{[0-9]+}} +; SI: v_cmp_lt_u32_e64 vcc, s{{[0-9]+}}, 32 ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} ; SI: v_cndmask_b32_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} Index: test/CodeGen/AMDGPU/setcc-opt.ll =================================================================== --- test/CodeGen/AMDGPU/setcc-opt.ll +++ test/CodeGen/AMDGPU/setcc-opt.ll @@ -178,7 +178,7 @@ ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg: ; GCN: s_load_dword [[B:s[0-9]+]] -; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -1, [[B]] +; GCN: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}} ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]] ; GCN-NEXT: buffer_store_byte [[RESULT]] ; GCN: s_endpgm Index: test/CodeGen/AMDGPU/setcc.ll =================================================================== --- test/CodeGen/AMDGPU/setcc.ll +++ test/CodeGen/AMDGPU/setcc.ll @@ -391,8 +391,8 @@ } ; FUNC-LABEL: setcc-i1-and-xor -; SI-DAG: v_cmp_le_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}} -; SI-DAG: v_cmp_ge_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], 1.0, s{{[0-9]+}} +; SI-DAG: v_cmp_ge_f32_e64 [[A:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} +; SI-DAG: v_cmp_le_f32_e64 [[B:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 1.0 ; SI: s_and_b64 s[2:3], [[A]], [[B]] define void @setcc-i1-and-xor(i32 addrspace(1)* %out, float %cond) #0 { bb0: Index: test/CodeGen/AMDGPU/skip-if-dead.ll =================================================================== --- test/CodeGen/AMDGPU/skip-if-dead.ll +++ test/CodeGen/AMDGPU/skip-if-dead.ll @@ -302,7 +302,7 @@ } ; CHECK-LABEL: {{^}}no_skip_no_successors: -; CHECK: v_cmp_nle_f32 +; CHECK: v_cmp_nge_f32 ; CHECK: s_and_b64 vcc, exec, ; CHECK: s_cbranch_vccz [[SKIPKILL:BB[0-9]+_[0-9]+]] Index: test/CodeGen/AMDGPU/smrd-vccz-bug.ll =================================================================== --- test/CodeGen/AMDGPU/smrd-vccz-bug.ll +++ test/CodeGen/AMDGPU/smrd-vccz-bug.ll @@ -4,7 +4,7 @@ ; GCN-FUNC: {{^}}vccz_workaround: ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0 -; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0 +; GCN: v_cmp_neq_f32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} ; GCN: s_and_b64 vcc, exec, [[MASK]] ; GCN: s_waitcnt lgkmcnt(0) ; VCCZ-BUG: s_mov_b64 vcc, vcc Index: test/CodeGen/AMDGPU/trunc.ll =================================================================== --- test/CodeGen/AMDGPU/trunc.ll +++ test/CodeGen/AMDGPU/trunc.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s declare i32 @llvm.r600.read.tidig.x() nounwind readnone @@ -73,7 +73,7 @@ ; SI-LABEL: {{^}}s_trunc_i64_to_i1: ; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb ; SI: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]] -; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], 1, [[MASKED]] +; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], [[MASKED]], 1{{$}} ; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]] define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) { %trunc = trunc i64 %x to i1 Index: test/CodeGen/AMDGPU/uniform-cfg.ll =================================================================== --- test/CodeGen/AMDGPU/uniform-cfg.ll +++ test/CodeGen/AMDGPU/uniform-cfg.ll @@ -31,7 +31,7 @@ ; SI-LABEL: {{^}}uniform_if_vcc: ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and ; also scheduled the write first. -; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}} +; SI-DAG: v_cmp_eq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} ; SI-DAG: s_and_b64 vcc, exec, [[COND]] ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] @@ -88,7 +88,7 @@ ; SI-LABEL: {{^}}uniform_if_swap_br_targets_vcc: ; FIXME: We could use _e32 here if we re-used the 0 from [[STORE_VAL]], and ; also scheduled the write first. -; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], 0, s{{[0-9]+}} +; SI-DAG: v_cmp_neq_f32_e64 [[COND:vcc|s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, 0{{$}} ; SI-DAG: s_and_b64 vcc, exec, [[COND]] ; SI-DAG: v_mov_b32_e32 [[STORE_VAL:v[0-9]+]], 0 ; SI: s_cbranch_vccnz [[IF_LABEL:[0-9_A-Za-z]+]] @@ -253,7 +253,7 @@ ; SI: s_load_dword [[COND:s[0-9]+]] ; SI: s_cmp_lt_i32 [[COND]], 1 ; SI: s_cbranch_scc1 [[EXIT:[A-Za-z0-9_]+]] -; SI: v_cmp_lt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], 0, [[COND]] +; SI: v_cmp_gt_i32_e64 [[MASK:s\[[0-9]+:[0-9]+\]]], [[COND]], 0{{$}} ; SI: s_and_b64 vcc, exec, [[MASK]] ; SI: s_cbranch_vccnz [[EXIT]] ; SI: buffer_store