diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -391,10 +391,12 @@ Inst.addReg(SourceRegs[0]); } else { // Otherwise, this outputs to a register (directly for C_Register / - // C_RegisterClass. Find a register that we can use. + // C_RegisterClass/C_Other. assert(OpInfo.ConstraintType == TargetLowering::C_Register || - OpInfo.ConstraintType == TargetLowering::C_RegisterClass); + OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Other); + // Find a register that we can use. if (OpInfo.Regs.empty()) { LLVM_DEBUG(dbgs() << "Couldn't allocate output register for constraint\n"); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -1165,6 +1165,12 @@ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } + /// Handle Lowering flag assembly outputs. + SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, + const SDLoc &DL, + const AsmOperandInfo &Constraint, + SelectionDAG &DAG) const override; + bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override; bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override; bool isVectorLoadExtDesirable(SDValue ExtVal) const override; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9864,6 +9864,72 @@ return P; } +// The set of cc code supported is from +// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands +static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint) { + AArch64CC::CondCode Cond = StringSwitch(Constraint) + .Case("{@cchi}", AArch64CC::HI) + .Case("{@cccs}", AArch64CC::HS) + .Case("{@cclo}", AArch64CC::LO) + .Case("{@ccls}", AArch64CC::LS) + .Case("{@cccc}", AArch64CC::LO) + .Case("{@cceq}", AArch64CC::EQ) + .Case("{@ccgt}", AArch64CC::GT) + .Case("{@ccge}", AArch64CC::GE) + .Case("{@cclt}", AArch64CC::LT) + .Case("{@ccle}", AArch64CC::LE) + .Case("{@cchs}", AArch64CC::HS) + .Case("{@ccne}", AArch64CC::NE) + .Case("{@ccvc}", AArch64CC::VC) + .Case("{@ccpl}", AArch64CC::PL) + .Case("{@ccvs}", AArch64CC::VS) + .Case("{@ccmi}", AArch64CC::MI) + .Default(AArch64CC::Invalid); + return Cond; +} + +/// Helper function to create 'CSET', which is equivalent to 'CSINC , WZR, +/// WZR, invert()'. +static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL, + SelectionDAG &DAG) { + return DAG.getNode( + AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32), NZCV); +} + +// Lower @cc flag output via getSETCC. +SDValue AArch64TargetLowering::LowerAsmOutputForConstraint( + SDValue &Chain, SDValue &Glue, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { + AArch64CC::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode); + if (Cond == AArch64CC::Invalid) + return SDValue(); + // The output variable should be a scalar integer. + if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() || + OpInfo.ConstraintVT.getSizeInBits() < 8) + report_fatal_error("Flag output operand is of invalid type"); + + // Get NZCV register. Only update chain when copyfrom is glued. + if (Glue.getNode()) { + Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32, Glue); + Chain = Glue.getValue(1); + } else + Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32); + // Extract CC code. + SDValue CC = getSETCC(Cond, Glue, DL, DAG); + + SDValue Result; + + // Truncate or ZERO_EXTEND based on value types. + if (OpInfo.ConstraintVT.getSizeInBits() <= 32) + Result = DAG.getNode(ISD::TRUNCATE, DL, OpInfo.ConstraintVT, CC); + else + Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC); + + return Result; +} + /// getConstraintType - Given a constraint letter, return the type of /// constraint it is for this target. AArch64TargetLowering::ConstraintType @@ -9896,6 +9962,8 @@ } else if (parsePredicateConstraint(Constraint) != PredicateConstraint::Invalid) return C_RegisterClass; + else if (parseConstraintCode(Constraint) != AArch64CC::Invalid) + return C_Other; return TargetLowering::getConstraintType(Constraint); } @@ -9993,7 +10061,8 @@ : std::make_pair(0U, &AArch64::PPRRegClass); } } - if (StringRef("{cc}").equals_insensitive(Constraint)) + if (StringRef("{cc}").equals_insensitive(Constraint) || + parseConstraintCode(Constraint) != AArch64CC::Invalid) return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass); // Use the default implementation in TargetLowering to convert the register diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -154,6 +154,20 @@ ret i32 %ret } +declare void @llvm.assume(i1) + +; FALLBACK-WITH-REPORT-ERR: :0:0: unable to translate instruction: call: ' %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a)' (in function: inline_asm_with_output_constraint) +; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for inline_asm_with_output_constraint +; FALLBACK-WITH-REPORT-OUT-LABEL: inline_asm_with_output_constraint +define i32 @inline_asm_with_output_constraint(i64 %a) { +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + attributes #1 = { "target-features"="+sve" } attributes #2 = { "target-features"="+ls64" } diff --git a/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll b/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/inline-asm-flag-output.ll @@ -0,0 +1,259 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s +define i32 @test_cchi(i64 %a) { +; CHECK-LABEL: test_cchi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_cccs(i64 %a) { +; CHECK-LABEL: test_cccs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, hs +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cccs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_cclo(i64 %a) { +; CHECK-LABEL: test_cclo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cclo},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccls(i64 %a) { +; CHECK-LABEL: test_ccls: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccls},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_cccc(i64 %a) { +; CHECK-LABEL: test_cccc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cccc},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_cceq(i64 %a) { +; CHECK-LABEL: test_cceq: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cceq},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccgt(i64 %a) { +; CHECK-LABEL: test_ccgt: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, gt +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccgt},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccge(i64 %a) { +; CHECK-LABEL: test_ccge: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, ge +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccge},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_cclt(i64 %a) { +; CHECK-LABEL: test_cclt: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cclt},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccle(i64 %a) { +; CHECK-LABEL: test_ccle: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, le +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccle},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_cchs(i64 %a) { +; CHECK-LABEL: test_cchs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, hs +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@cchs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccne(i64 %a) { +; CHECK-LABEL: test_ccne: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccne},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccvc(i64 %a) { +; CHECK-LABEL: test_ccvc: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, vc +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccvc},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccpl(i64 %a) { +; CHECK-LABEL: test_ccpl: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, pl +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccpl},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccvs(i64 %a) { +; CHECK-LABEL: test_ccvs: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, vs +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccvs},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +define i32 @test_ccmi(i64 %a) { +; CHECK-LABEL: test_ccmi: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: //APP +; CHECK-NEXT: subs x0, x0, #3 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: cset w0, mi +; CHECK-NEXT: ret +entry: + %0 = tail call { i64, i32 } asm "subs $0, $0, #3", "=r,={@ccmi},0,~{dirflag},~{fpsr},~{flags}"(i64 %a) + %asmresult1 = extractvalue { i64, i32 } %0, 1 + %1 = icmp ult i32 %asmresult1, 2 + tail call void @llvm.assume(i1 %1) + ret i32 %asmresult1 +} + +declare void @llvm.assume(i1)