diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -232,6 +232,11 @@ } } +static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) { + unsigned Flag = I.getOperand(OpIdx).getImm(); + return InlineAsm::getNumOperandRegisters(Flag); +} + bool InlineAsmLowering::lowerInlineAsm( MachineIRBuilder &MIRBuilder, const CallBase &Call, std::function(const Value &Val)> GetOrCreateVRegs) @@ -317,6 +322,10 @@ .addExternalSymbol(IA->getAsmString().c_str()) .addImm(ExtraInfo.get()); + // Starting from this operand: flag followed by register(s) will be added as + // operands to Inst for each constraint. Used for matching input constraints. + unsigned StartIdx = Inst->getNumOperands(); + // Collects the output operands for later processing GISelAsmOperandInfoVector OutputOperands; @@ -390,8 +399,31 @@ break; case InlineAsm::isInput: { if (OpInfo.isMatchingInputConstraint()) { - LLVM_DEBUG(dbgs() << "Tied input operands not supported yet\n"); - return false; + unsigned DefIdx = OpInfo.getMatchedOperand(); + // Find operand with register def that corresponds to DefIdx. + unsigned InstFlagIdx = StartIdx; + for (unsigned i = 0; i < DefIdx; ++i) + InstFlagIdx += getNumOpRegs(*Inst, InstFlagIdx) + 1; + assert(getNumOpRegs(*Inst, InstFlagIdx) == 1 && "Wrong flag"); + + // We want to tie input to register in next operand. + unsigned DefRegIdx = InstFlagIdx + 1; + Register Def = Inst->getOperand(DefRegIdx).getReg(); + + // Copy input to new vreg with same reg class as Def + const TargetRegisterClass *RC = MRI->getRegClass(Def); + ArrayRef SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); + assert(SrcRegs.size() == 1 && "Single register is expected here"); + Register Tmp = MRI->createVirtualRegister(RC); + MIRBuilder.buildCopy(Tmp, SrcRegs[0]); + + // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def. + unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1); + unsigned Flag = InlineAsm::getFlagWordForMatchingOp(UseFlag, DefIdx); + Inst.addImm(Flag); + Inst.addReg(Tmp); + Inst->tieOperands(DefRegIdx, Inst->getNumOperands() - 1); + break; } if (OpInfo.ConstraintType == TargetLowering::C_Other && diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -O0 -global-isel -verify-machineinstrs -o - %s | FileCheck %s + +define i32 @test_sgpr_reg_class_constraint() nounwind { +; CHECK-LABEL: test_sgpr_reg_class_constraint: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s4, 7 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s5, 8 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_add_u32 s4, s4, s5 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind + %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind + %asm2 = tail call i32 asm "s_add_u32 $0, $1, $2", "=s,s,s"(i32 %asm0, i32 %asm1) nounwind + ret i32 %asm2 +} + +define i32 @test_sgpr_matching_constraint() nounwind { +; CHECK-LABEL: test_sgpr_matching_constraint: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s4, 7 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s5, 8 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_add_u32 s5, s4, s5 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s5 +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind + %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind + %asm2 = tail call i32 asm "s_add_u32 $0, $1, $2", "=s,s,0"(i32 %asm0, i32 %asm1) nounwind + ret i32 %asm2 +} + +define i32 @test_sgpr_to_vgpr_move_reg_class_constraint() nounwind { +; CHECK-LABEL: test_sgpr_to_vgpr_move_reg_class_constraint: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s4, 7 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: v_mov_b32 v0, s4 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind + %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,s"(i32 %asm0) nounwind + ret i32 %asm1 +} + +define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { +; CHECK-LABEL: test_sgpr_to_vgpr_move_matching_constraint: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: s_mov_b32 s4, 7 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_mov_b32_e32 v0, s4 +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: v_mov_b32 v0, v0 +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind + %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind + ret i32 %asm1 +} + +!0 = !{i32 70} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -234,4 +234,96 @@ ret i32 %1 } +define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { + ; CHECK-LABEL: name: test_vgpr_matching_constraint + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) + ; CHECK: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %4 + ; CHECK: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + %and = and i32 %a, 1 + %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and) + ret i32 %asm +} + +define i32 @test_sgpr_matching_constraint() nounwind { + ; CHECK-LABEL: name: test_sgpr_matching_constraint + ; CHECK: bb.1.entry: + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32) + ; CHECK: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 9 /* reguse */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY %5 + ; CHECK: $vgpr0 = COPY [[COPY5]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 +entry: + %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind + %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind + %asm2 = tail call i32 asm "s_add_u32 $0, $1, $2", "=s,s,0"(i32 %asm0, i32 %asm1) nounwind + ret i32 %asm2 +} + +define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { + ; CHECK-LABEL: name: test_many_matching_constraints + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; CHECK: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY %6 + ; CHECK: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY10]] + %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) + %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 + store i32 %asmresult0, i32 addrspace(1)* undef + %asmresult1 = extractvalue {i32, i32, i32} %asm, 1 + store i32 %asmresult1, i32 addrspace(1)* undef + %asmresult2 = extractvalue {i32, i32, i32} %asm, 2 + store i32 %asmresult2, i32 addrspace(1)* undef + ret void +} + +define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { + ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint + ; CHECK: bb.1.entry: + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; CHECK: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 +entry: + %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind + %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind + ret i32 %asm1 +} + !0 = !{i32 70}