Index: llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp =================================================================== --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -4516,6 +4516,17 @@ return SDValue(); } +static SDValue PerformStoreRetvalCombine(SDNode *N) { + // Operands from the 2nd to the last one are the values to be stored + for (std::size_t I = 2, OpsCount = N->ops().size(); I != OpsCount; ++I) + if (!N->getOperand(I).isUndef()) + return SDValue(); + + // Operand 0 is the previous value in the chain. Cannot return EntryToken + // as the previous value will become unused and eliminated later. + return N->getOperand(0); +} + /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, @@ -4844,6 +4855,10 @@ return PerformREMCombine(N, DCI, OptLevel); case ISD::SETCC: return PerformSETCCCombine(N, DCI); + case NVPTXISD::StoreRetval: + case NVPTXISD::StoreRetvalV2: + case NVPTXISD::StoreRetvalV4: + return PerformStoreRetvalCombine(N); } return SDValue(); } Index: llvm/test/CodeGen/NVPTX/store-retval.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/NVPTX/store-retval.ll @@ -0,0 +1,111 @@ +; RUN: llc < %s --mtriple=nvptx-unknown-unknown | FileCheck %s +; +; This is IR generated with clang using -O3 optimization level +; and nvptx-unknown-unknown target from the following C code. +; +; struct StNoalign { unsigned int field[73]; }; +; struct StAlign8 { _Alignas(8) unsigned int field[73]; }; +; struct StAlign16 { _Alignas(16) unsigned int field[73]; }; +; +; #define DECLARE_FUNC(StName) \ +; struct StName func_##StName(struct StName in) { \ +; struct StName ret; \ +; ret.field[8] = in.field[12]; \ +; ret.field[9] = in.field[13]; \ +; ret.field[10] = in.field[14]; \ +; ret.field[11] = in.field[15]; \ +; ret.field[27] = 0; \ +; return ret; \ +; } \ +; +; DECLARE_FUNC(StNoalign) +; DECLARE_FUNC(StAlign8) +; DECLARE_FUNC(StAlign16) + +%struct.StNoalign = type { [73 x i32] } + +define %struct.StNoalign @func_StNoalign(%struct.StNoalign* nocapture noundef readonly byval(%struct.StNoalign) align 4 %in) { + ; CHECK-LABEL: .func{{.*}}func_StNoalign + ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [func_StNoalign_param_0+48]; + ; CHECK: ld.param.u32 [[B:%r[0-9]+]], [func_StNoalign_param_0+52]; + ; CHECK: ld.param.u32 [[C:%r[0-9]+]], [func_StNoalign_param_0+56]; + ; CHECK: ld.param.u32 [[D:%r[0-9]+]], [func_StNoalign_param_0+60]; + ; CHECK: st.param.b32 [func_retval0+32], [[A]]; + ; CHECK: st.param.b32 [func_retval0+36], [[B]]; + ; CHECK: st.param.b32 [func_retval0+40], [[C]]; + ; CHECK: st.param.b32 [func_retval0+44], [[D]]; + ; CHECK: mov.u32 [[E:%r[0-9]+]], 0; + ; CHECK: st.param.b32 [func_retval0+108], [[E]]; + ; CHECK-NEXT: ret; + %arrayidx = getelementptr inbounds %struct.StNoalign, %struct.StNoalign* %in, i32 0, i32 0, i32 12 + %1 = load i32, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds %struct.StNoalign, %struct.StNoalign* %in, i32 0, i32 0, i32 13 + %2 = load i32, i32* %arrayidx4, align 4 + %arrayidx8 = getelementptr inbounds %struct.StNoalign, %struct.StNoalign* %in, i32 0, i32 0, i32 14 + %3 = load i32, i32* %arrayidx8, align 4 + %arrayidx12 = getelementptr inbounds %struct.StNoalign, %struct.StNoalign* %in, i32 0, i32 0, i32 15 + %4 = load i32, i32* %arrayidx12, align 4 + %.fca.0.8.insert = insertvalue %struct.StNoalign { [73 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison] }, i32 %1, 0, 8 + %.fca.0.9.insert = insertvalue %struct.StNoalign %.fca.0.8.insert, i32 %2, 0, 9 + %.fca.0.10.insert = insertvalue %struct.StNoalign %.fca.0.9.insert, i32 %3, 0, 10 + %.fca.0.11.insert = insertvalue %struct.StNoalign %.fca.0.10.insert, i32 %4, 0, 11 + %.fca.0.27.insert = insertvalue %struct.StNoalign %.fca.0.11.insert, i32 0, 0, 27 + ret %struct.StNoalign %.fca.0.27.insert +} + +%struct.StAlign8 = type { [73 x i32], [4 x i8] } + +define %struct.StAlign8 @func_StAlign8(%struct.StAlign8* nocapture noundef readonly byval(%struct.StAlign8) align 8 %in) { + ; CHECK-LABEL: .func{{.*}}func_StAlign8 + ; CHECK: ld.param.v2.u32 {[[A:%r[0-9]+]], [[B:%r[0-9]+]]}, [func_StAlign8_param_0+48]; + ; CHECK: ld.param.v2.u32 {[[C:%r[0-9]+]], [[D:%r[0-9]+]]}, [func_StAlign8_param_0+56]; + ; CHECK: st.param.b32 [func_retval0+32], [[A]]; + ; CHECK: st.param.b32 [func_retval0+36], [[B]]; + ; CHECK: st.param.b32 [func_retval0+40], [[C]]; + ; CHECK: st.param.b32 [func_retval0+44], [[D]]; + ; CHECK: mov.u32 [[E:%r[0-9]+]], 0; + ; CHECK: st.param.b32 [func_retval0+108], [[E]]; + ; CHECK-NEXT: ret; + %arrayidx = getelementptr inbounds %struct.StAlign8, %struct.StAlign8* %in, i32 0, i32 0, i32 12 + %1 = load i32, i32* %arrayidx, align 8 + %arrayidx4 = getelementptr inbounds %struct.StAlign8, %struct.StAlign8* %in, i32 0, i32 0, i32 13 + %2 = load i32, i32* %arrayidx4, align 4 + %arrayidx8 = getelementptr inbounds %struct.StAlign8, %struct.StAlign8* %in, i32 0, i32 0, i32 14 + %3 = load i32, i32* %arrayidx8, align 8 + %arrayidx12 = getelementptr inbounds %struct.StAlign8, %struct.StAlign8* %in, i32 0, i32 0, i32 15 + %4 = load i32, i32* %arrayidx12, align 4 + %.fca.0.8.insert = insertvalue %struct.StAlign8 { [73 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison], [4 x i8] poison }, i32 %1, 0, 8 + %.fca.0.9.insert = insertvalue %struct.StAlign8 %.fca.0.8.insert, i32 %2, 0, 9 + %.fca.0.10.insert = insertvalue %struct.StAlign8 %.fca.0.9.insert, i32 %3, 0, 10 + %.fca.0.11.insert = insertvalue %struct.StAlign8 %.fca.0.10.insert, i32 %4, 0, 11 + %.fca.0.27.insert = insertvalue %struct.StAlign8 %.fca.0.11.insert, i32 0, 0, 27 + ret %struct.StAlign8 %.fca.0.27.insert +} + +%struct.StAlign16 = type { [73 x i32], [12 x i8] } + +define %struct.StAlign16 @func_StAlign16(%struct.StAlign16* nocapture noundef readonly byval(%struct.StAlign16) align 16 %in) { + ; CHECK-LABEL: .func{{.*}}func_StAlign16 + ; CHECK: ld.param.v4.u32 {[[A:%r[0-9]+]], [[B:%r[0-9]+]], [[C:%r[0-9]+]], [[D:%r[0-9]+]]}, [func_StAlign16_param_0+48]; + ; CHECK: st.param.b32 [func_retval0+32], [[A]]; + ; CHECK: st.param.b32 [func_retval0+36], [[B]]; + ; CHECK: st.param.b32 [func_retval0+40], [[C]]; + ; CHECK: st.param.b32 [func_retval0+44], [[D]]; + ; CHECK: mov.u32 [[E:%r[0-9]+]], 0; + ; CHECK: st.param.b32 [func_retval0+108], [[E]]; + ; CHECK-NEXT: ret; + %arrayidx = getelementptr inbounds %struct.StAlign16, %struct.StAlign16* %in, i32 0, i32 0, i32 12 + %1 = load i32, i32* %arrayidx, align 16 + %arrayidx4 = getelementptr inbounds %struct.StAlign16, %struct.StAlign16* %in, i32 0, i32 0, i32 13 + %2 = load i32, i32* %arrayidx4, align 4 + %arrayidx8 = getelementptr inbounds %struct.StAlign16, %struct.StAlign16* %in, i32 0, i32 0, i32 14 + %3 = load i32, i32* %arrayidx8, align 8 + %arrayidx12 = getelementptr inbounds %struct.StAlign16, %struct.StAlign16* %in, i32 0, i32 0, i32 15 + %4 = load i32, i32* %arrayidx12, align 4 + %.fca.0.8.insert = insertvalue %struct.StAlign16 { [73 x i32] [i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison], [12 x i8] poison }, i32 %1, 0, 8 + %.fca.0.9.insert = insertvalue %struct.StAlign16 %.fca.0.8.insert, i32 %2, 0, 9 + %.fca.0.10.insert = insertvalue %struct.StAlign16 %.fca.0.9.insert, i32 %3, 0, 10 + %.fca.0.11.insert = insertvalue %struct.StAlign16 %.fca.0.10.insert, i32 %4, 0, 11 + %.fca.0.27.insert = insertvalue %struct.StAlign16 %.fca.0.11.insert, i32 0, 0, 27 + ret %struct.StAlign16 %.fca.0.27.insert +}