Index: llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -449,7 +449,7 @@ STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) O << "0.15915494309189532"; else { - assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882); + assert(isInt<32>(Imm) || isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882); // In rare situations, we will have a 32-bit literal in a 64-bit // operand. This is technically allowed for the encoding of s_mov_b64. Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -105,6 +105,7 @@ std::pair isOMod(const MachineInstr &MI) const; bool tryFoldOMod(MachineInstr &MI); + bool tryFoldRegSeqence(MachineInstr &MI); public: SIFoldOperands() : MachineFunctionPass(ID) { @@ -1463,6 +1464,35 @@ return true; } +// Try to fold 64-bit immediate reg_sequence into uses. +bool SIFoldOperands::tryFoldRegSeqence(MachineInstr &MI) { + assert(MI.isRegSequence()); + auto Reg = MI.getOperand(0).getReg(); + SmallVector, 32> Defs; + + if (Reg.isPhysical()) + return false; + + if (TII->getOpSize(MI, 0) != 8 || + !getRegSeqInit(Defs, Reg, AMDGPU::OPERAND_REG_IMM_INT32, TII, *MRI)) + return false; + + assert(Defs.size() == 2); + if (!Defs[0].first->isImm() || !Defs[1].first->isImm()) + return false; + + uint64_t Lit = ((Defs[0].first->getImm() & 0xffffffff) << + (Defs[0].second == AMDGPU::sub0 ? 0 : 32)) | + ((Defs[1].first->getImm() & 0xffffffff) << + (Defs[1].second == AMDGPU::sub0 ? 0 : 32)); + + MI.addOperand(MachineOperand::CreateImm(Lit)); + foldInstOperand(MI, MI.getOperand(MI.getNumOperands() - 1)); + MI.RemoveOperand(MI.getNumOperands() - 1); + + return true; +} + bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -1490,6 +1520,9 @@ tryFoldInst(TII, &MI); + if (MI.isRegSequence() && tryFoldRegSeqence(MI)) + continue; + if (!TII->isFoldableCopy(MI)) { // Saw an unknown clobber of m0, so we no longer know what it is. if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2850,7 +2850,8 @@ case AMDGPU::OPERAND_REG_INLINE_AC_INT32: case AMDGPU::OPERAND_REG_INLINE_AC_FP32: { int32_t Trunc = static_cast(Imm); - return AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); + return (isInt<32>(Imm) || isUInt<32>(Imm)) && + AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm()); } case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_IMM_FP64: @@ -2950,6 +2951,12 @@ if (!RI.opCanUseLiteralConstant(OpInfo.OperandType)) return false; + if (MO.isImm()) { + int64_t Imm = MO.getImm(); + if (!isInt<32>(Imm) && !isUInt<32>(Imm)) + return false; + } + if (!isVOP3(MI) || !AMDGPU::isSISrcOperand(InstDesc, OpNo)) return true; Index: llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -179,10 +179,8 @@ ; GCN-NEXT: s_and_b64 vcc, exec, s[4:5] ; GCN-NEXT: s_cbranch_vccz BB4_2 ; GCN-NEXT: ; %bb.1: -; GCN-NEXT: s_mov_b32 s4, 0 -; GCN-NEXT: s_mov_b32 s5, s4 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_branch BB4_3 ; GCN-NEXT: BB4_2: ; %if.else ; GCN-NEXT: s_getpc_b64 s[4:5] @@ -223,10 +221,8 @@ ; GCN-NEXT: s_and_b64 vcc, exec, s[4:5] ; GCN-NEXT: s_cbranch_vccz BB5_2 ; GCN-NEXT: ; %bb.1: -; GCN-NEXT: s_mov_b32 s4, 0 -; GCN-NEXT: s_mov_b32 s5, s4 -; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: s_branch BB5_3 ; GCN-NEXT: BB5_2: ; %if.else ; GCN-NEXT: s_getpc_b64 s[4:5] Index: llvm/test/CodeGen/AMDGPU/inline-constraints.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/inline-constraints.ll +++ llvm/test/CodeGen/AMDGPU/inline-constraints.ll @@ -58,8 +58,7 @@ ; FIXME: Should be able to use s_mov_b64 ; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64: -; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4{{$}} -; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}} +; GCN-DAG: s_mov_b64 s{{\[}}[[REG_LO:[0-9]+]]:[[REG_HI:[0-9]+]]], -4{{$}} ; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} define amdgpu_kernel void @inline_sreg_constraint_imm_i64() { tail call void asm sideeffect "; use $0", "s"(i64 -4) @@ -74,3 +73,120 @@ tail call void asm sideeffect "; use $0", "s"(double 1.0) ret void } + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m4: +; GCN-DAG: s_mov_b64 s{{\[}}[[REG_LO:[0-9]+]]:[[REG_HI:[0-9]+]]], -4{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m4() { + tail call void asm sideeffect "; use $0", "s"(i64 -4) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_4_0: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 4.0{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_4_0() { + tail call void asm sideeffect "; use $0", "s"(i64 4647714815446351872) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m4_0: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -4.0{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m4_0() { + tail call void asm sideeffect "; use $0", "s"(i64 13871086852301127680) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_1: +; GCN-DAG: s_mov_b64 s{{\[}}[[REG_LO:[0-9]+]]:[[REG_HI:[0-9]+]]], 1{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_1() { + tail call void asm sideeffect "; use $0", "s"(i64 1) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_4_m1: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -1{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 4.0{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_4_m1() { + tail call void asm sideeffect "; use $0", "s"(i64 4647714819741319167) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m1_4: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 4.0{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m1_4() { + tail call void asm sideeffect "; use $0", "s"(i64 18446744070496714752) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m1_m4: +; GCN-DAG: s_mov_b64 s{{\[}}[[REG_LO:[0-9]+]]:[[REG_HI:[0-9]+]]], 0xffffffffc0800000{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m1_m4() { + tail call void asm sideeffect "; use $0", "s"(i64 18446744072644198400) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_1_4: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 4.0{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 1{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_1_4() { + tail call void asm sideeffect "; use $0", "s"(i64 5377097728) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_1_m4: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4.0{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 1{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_1_m4() { + tail call void asm sideeffect "; use $0", "s"(i64 7524581376) + ret void +} + +; FIXME: Should be able to use s_mov_b64 +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_100: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0x42c80000{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_100() { + tail call void asm sideeffect "; use $0", "s"(i64 1120403456) + ret void +} + +; FIXME: Should be able to use s_mov_b64 +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m100: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0xc2c80000{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m100() { + tail call void asm sideeffect "; use $0", "s"(i64 3267887104) + ret void +} + +; FIXME: Should be able to use s_mov_b64 +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_m1_m100: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0xc2c80000{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_m1_m100() { + tail call void asm sideeffect "; use $0", "s"(i64 18446744072682471424) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64_1_m100: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0xc2c80000{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 1{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define amdgpu_kernel void @inline_sreg_constraint_imm_i64_1_m100() { + tail call void asm sideeffect "; use $0", "s"(i64 7562854400) + ret void +} Index: llvm/test/CodeGen/AMDGPU/shl.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/shl.ll +++ llvm/test/CodeGen/AMDGPU/shl.ll @@ -426,9 +426,7 @@ ; low 32-bits, which is not a valid 64-bit inline immmediate. ; FUNC-LABEL: {{^}}s_shl_inline_imm_f32_4.0_i64: -; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0 -; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}} -; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}} +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 0x40800000, s{{[0-9]+}} define amdgpu_kernel void @s_shl_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %shl = shl i64 1082130432, %a store i64 %shl, i64 addrspace(1)* %out, align 8 @@ -437,10 +435,7 @@ ; FIXME: Copy of -1 register ; FUNC-LABEL: {{^}}s_shl_inline_imm_f32_neg_4.0_i64: -; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0 -; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}} -; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]] -; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}}, s{{[0-9]+}} +; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 0xffffffffc0800000, s{{[0-9]+}} define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { %shl = shl i64 -1065353216, %a store i64 %shl, i64 addrspace(1)* %out, align 8