diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1002,12 +1002,12 @@ PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>; let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in -def PseudoBRIND : Pseudo<(outs), (ins GPR:$rs1, simm12:$imm12), []>, +def PseudoBRIND : Pseudo<(outs), (ins GPRJALR:$rs1, simm12:$imm12), []>, PseudoInstExpansion<(JALR X0, GPR:$rs1, simm12:$imm12)>; -def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>; -def : Pat<(brind (add GPR:$rs1, simm12:$imm12)), - (PseudoBRIND GPR:$rs1, simm12:$imm12)>; +def : Pat<(brind GPRJALR:$rs1), (PseudoBRIND GPRJALR:$rs1, 0)>; +def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)), + (PseudoBRIND GPRJALR:$rs1, simm12:$imm12)>; // PseudoCALLReg is a generic pseudo instruction for calls which will eventually // expand to auipc and jalr while encoding, with any given register used as the @@ -1039,8 +1039,8 @@ def : Pat<(riscv_mret_flag), (MRET X0, X0)>; let isCall = 1, Defs = [X1] in -def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1), - [(riscv_call GPR:$rs1)]>, +def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1), + [(riscv_call GPRJALR:$rs1)]>, PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>; let isBarrier = 1, isReturn = 1, isTerminator = 1 in diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -162,6 +162,20 @@ let RegInfos = XLenRI; } +// Don't use X1 or X5 for JALR since that is a hint to pop the return address +// stack on some microarchitectures. Also remove the reserved registers X0, X2, +// X3, and X4 as it reduces the number of register classes that get synthesized +// by tablegen. +def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (add + (sequence "X%u", 10, 17), + (sequence "X%u", 6, 7), + (sequence "X%u", 28, 31), + (sequence "X%u", 8, 9), + (sequence "X%u", 18, 27) + )> { + let RegInfos = XLenRI; +} + def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add (sequence "X%u", 10, 15), (sequence "X%u", 8, 9) @@ -171,9 +185,10 @@ // For indirect tail calls, we can't use callee-saved registers, as they are // restored to the saved value before the tail call, which would clobber a call -// address. +// address. We shouldn't use x5 since that is a hint for to pop the return +// address stack on some microarchitectures. def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add - (sequence "X%u", 5, 7), + (sequence "X%u", 6, 7), (sequence "X%u", 10, 17), (sequence "X%u", 28, 31) )> { diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll --- a/llvm/test/CodeGen/RISCV/calls.ll +++ b/llvm/test/CodeGen/RISCV/calls.ll @@ -114,6 +114,46 @@ ret i32 %1 } +; Make sure we don't use t0 as the source for jalr as that is a hint to pop the +; return address stack on some microarchitectures. +define i32 @test_call_indirect_no_t0(i32 (i32, i32, i32, i32, i32, i32, i32)* %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind { +; RV32I-LABEL: test_call_indirect_no_t0: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t1, a0 +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: mv a4, a5 +; RV32I-NEXT: mv a5, a6 +; RV32I-NEXT: mv a6, a7 +; RV32I-NEXT: jalr t1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32I-PIC-LABEL: test_call_indirect_no_t0: +; RV32I-PIC: # %bb.0: +; RV32I-PIC-NEXT: addi sp, sp, -16 +; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-PIC-NEXT: mv t1, a0 +; RV32I-PIC-NEXT: mv a0, a1 +; RV32I-PIC-NEXT: mv a1, a2 +; RV32I-PIC-NEXT: mv a2, a3 +; RV32I-PIC-NEXT: mv a3, a4 +; RV32I-PIC-NEXT: mv a4, a5 +; RV32I-PIC-NEXT: mv a5, a6 +; RV32I-PIC-NEXT: mv a6, a7 +; RV32I-PIC-NEXT: jalr t1 +; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-PIC-NEXT: addi sp, sp, 16 +; RV32I-PIC-NEXT: ret + %1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) + ret i32 %1 +} + ; Ensure that calls to fastcc functions aren't rejected. Such calls may be ; introduced when compiling with optimisation. diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -58,6 +58,24 @@ ret void } +; Make sure we don't use t0 as the source for jr as that is a hint to pop the +; return address stack on some microarchitectures. +define i32 @caller_indirect_no_t0(i32 (i32, i32, i32, i32, i32, i32, i32)* %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) { +; CHECK-LABEL: caller_indirect_no_t0: +; CHECK: # %bb.0: +; CHECK-NEXT: mv t1, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: mv a3, a4 +; CHECK-NEXT: mv a4, a5 +; CHECK-NEXT: mv a5, a6 +; CHECK-NEXT: mv a6, a7 +; CHECK-NEXT: jr t1 + %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) + ret i32 %9 +} + ; Do not tail call optimize functions with varargs passed by stack. declare i32 @callee_varargs(i32, ...) define void @caller_varargs(i32 %a, i32 %b) nounwind {