Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1899,6 +1899,15 @@ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } } + + // Undo the add -> or combine to merge constant offsets from a frame index. + if (N0.getOpcode() == ISD::OR && + isa(N0.getOperand(0)) && + isa(N0.getOperand(1)) && + DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { + SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) Index: test/CodeGen/AMDGPU/byval-frame-setup.ll =================================================================== --- test/CodeGen/AMDGPU/byval-frame-setup.ll +++ test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -74,46 +74,29 @@ ; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9 ; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13 -; VI-DAG: v_lshrrev_b32_e64 v{{[0-9]+}}, 6 -; CI-DAG: v_lshr_b32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 6 - -; GCN-DAG: v_add_i32_e64 [[FI_ADD0:v[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 8, -; GCN-DAG: v_or_b32_e32 [[FI_OR0:v[0-9]+]], 4, [[FI_ADD0]] - ; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8 ; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24 -; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], [[FI_OR0]], s[0:3], s4 offen offset:4 -; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], [[FI_OR0]], s[0:3], s4 offen offset:8 - -; FIXME: or fails to combine with add, so FI doesn't fold and scratch wave offset is used -; VI-DAG: v_lshrrev_b32_e64 v{{[0-9]+}}, 6 -; CI-DAG: v_lshr_b32_e64 v{{[0-9]+}}, s{{[0-9]+}}, 6 - -; GCN-DAG: v_add_i32_e64 [[FI_ADD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 24, -; GCN-DAG: v_or_b32_e32 [[FI_OR1:v[0-9]+]], 4, [[FI_ADD1]] - -; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8 -; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12 +; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8 +; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12 +; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16 +; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} +; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 +; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 +; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 -; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:8 -; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:12 -; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32{{$}} -; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:4 +; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24 +; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28 +; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32 +; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36 - -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], [[FI_OR1]], s[0:3], s4 offen offset:4 -; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], [[FI_OR1]], s[0:3], s4 offen offset:8 -; GCN: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28 -; GCN: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24 - - -; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:24 -; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:28 -; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:16 -; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 +; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 +; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 ; GCN: s_swappc_b64 ; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}} @@ -152,36 +135,25 @@ ; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}} -; FIXME: Fold offset -; GCN-DAG: v_or_b32_e32 [[OR_FI0:v[0-9]+]], 4, - -; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], [[OR_FI0]], s[0:3], s33 offen offset:4 -; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], [[OR_FI0]], s[0:3], s33 offen offset:8 - -; FIXME: Fold offset -; GCN-DAG: v_or_b32_e32 [[OR_FI1:v[0-9]+]], 4, - -; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12 -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8 - - -; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:8 -; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:12 -; GCN: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:4 -; GCN: buffer_store_dword [[LOAD2]], off, s[0:3], s32{{$}} - - - -; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], [[OR_FI1]], s[0:3], s33 offen offset:4 -; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], [[OR_FI1]], s[0:3], s33 offen offset:8 -; GCN: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28 -; GCN: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24 - - -; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:24 -; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:28 -; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:16 -; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8 +; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12 +; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16 +; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20 + +; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}} +; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4 +; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8 +; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12 + +; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24 +; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28 +; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32 +; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36 + +; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16 +; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20 +; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24 +; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28 ; GCN: s_swappc_b64 Index: test/CodeGen/BPF/undef.ll =================================================================== --- test/CodeGen/BPF/undef.ll +++ test/CodeGen/BPF/undef.ll @@ -14,31 +14,29 @@ ; Function Attrs: nounwind uwtable define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 section "socket1" { -; CHECK: r1 = r10 -; CHECK: r1 += -2 -; CHECK: r2 = 0 -; CHECK: *(u16 *)(r1 + 6) = r2 -; CHECK: *(u16 *)(r1 + 4) = r2 -; CHECK: *(u16 *)(r1 + 2) = r2 ; EL: r1 = 134678021 ; EB: r1 = 84281096 ; CHECK: *(u32 *)(r10 - 8) = r1 ; CHECK: r1 = 9 ; CHECK: *(u8 *)(r10 - 4) = r1 -; CHECK: r1 = 10 -; CHECK: *(u8 *)(r10 - 3) = r1 -; CHECK: *(u16 *)(r10 + 24) = r2 -; CHECK: *(u16 *)(r10 + 22) = r2 -; CHECK: *(u16 *)(r10 + 20) = r2 -; CHECK: *(u16 *)(r10 + 18) = r2 -; CHECK: *(u16 *)(r10 + 16) = r2 -; CHECK: *(u16 *)(r10 + 14) = r2 -; CHECK: *(u16 *)(r10 + 12) = r2 -; CHECK: *(u16 *)(r10 + 10) = r2 -; CHECK: *(u16 *)(r10 + 8) = r2 -; CHECK: *(u16 *)(r10 + 6) = r2 -; CHECK: *(u16 *)(r10 - 2) = r2 -; CHECK: *(u16 *)(r10 + 26) = r2 + +; CHECK: r1 = 0 +; CHECK: *(u16 *)(r10 + 24) = r1 +; CHECK: *(u16 *)(r10 + 22) = r1 +; CHECK: *(u16 *)(r10 + 20) = r1 +; CHECK: *(u16 *)(r10 + 18) = r1 +; CHECK: *(u16 *)(r10 + 16) = r1 +; CHECK: *(u16 *)(r10 + 14) = r1 +; CHECK: *(u16 *)(r10 + 12) = r1 +; CHECK: *(u16 *)(r10 + 10) = r1 +; CHECK: *(u16 *)(r10 + 8) = r1 +; CHECK: *(u16 *)(r10 + 6) = r1 +; CHECK: *(u16 *)(r10 + 4) = r1 +; CHECK: *(u16 *)(r10 + 2) = r1 +; CHECK: *(u16 *)(r10 + 0) = r1 +; CHECK: *(u16 *)(r10 - 2) = r1 +; CHECK: *(u16 *)(r10 + 26) = r1 + ; CHECK: r2 = r10 ; CHECK: r2 += -8 ; CHECK: r1 = ll