diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3762,6 +3762,14 @@ } } + // If there's no chance any bit will need to borrow from an adjacent bit: + // sub C, X --> xor X, C + if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) { + if (!C0->isOpaque() && + (~DAG.computeKnownBits(N1).Zero).isSubsetOf(C0->getAPIntValue())) + return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + } + return SDValue(); } diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1531,44 +1531,6 @@ (ADD32ri GR32:$src1, -2147483648)>; } -//===----------------------------------------------------------------------===// -// Pattern match SUB as XOR -//===----------------------------------------------------------------------===// - -// An immediate in the LHS of a subtract can't be encoded in the instruction. -// If there is no possibility of a borrow we can use an XOR instead of a SUB -// to enable the immediate to be folded. -// TODO: Move this to a DAG combine? - -def sub_is_xor : PatFrag<(ops node:$lhs, node:$rhs), (sub node:$lhs, node:$rhs),[{ - if (ConstantSDNode *CN = dyn_cast(N->getOperand(0))) { - KnownBits Known = CurDAG->computeKnownBits(N->getOperand(1)); - - // If all possible ones in the RHS are set in the LHS then there can't be - // a borrow and we can use xor. - return (~Known.Zero).isSubsetOf(CN->getAPIntValue()); - } - - return false; -}]>; - -let AddedComplexity = 5 in { -def : Pat<(sub_is_xor imm:$src2, GR8:$src1), - (XOR8ri GR8:$src1, imm:$src2)>; -def : Pat<(sub_is_xor i16immSExt8:$src2, GR16:$src1), - (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>; -def : Pat<(sub_is_xor imm:$src2, GR16:$src1), - (XOR16ri GR16:$src1, imm:$src2)>; -def : Pat<(sub_is_xor i32immSExt8:$src2, GR32:$src1), - (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; -def : Pat<(sub_is_xor imm:$src2, GR32:$src1), - (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(sub_is_xor i64immSExt8:$src2, GR64:$src1), - (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>; -def : Pat<(sub_is_xor i64immSExt32:$src2, GR64:$src1), - (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; -} - //===----------------------------------------------------------------------===// // Some peepholes //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sub1.ll b/llvm/test/CodeGen/AArch64/sub1.ll --- a/llvm/test/CodeGen/AArch64/sub1.ll +++ b/llvm/test/CodeGen/AArch64/sub1.ll @@ -18,9 +18,8 @@ ; CHECK-LABEL: masked_sub_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: mov w9, #7 ; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: sub w0, w9, w8 +; CHECK-NEXT: eor w0, w8, #0x7 ; CHECK-NEXT: ret %a = and i8 %x, 5 %m = sub i8 7, %a @@ -43,9 +42,8 @@ ; CHECK-LABEL: masked_sub_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: mov w9, #31 ; CHECK-NEXT: and w8, w0, w8 -; CHECK-NEXT: sub w0, w9, w8 +; CHECK-NEXT: eor w0, w8, #0x1f ; CHECK-NEXT: ret %a = and i32 %x, 9 %m = sub i32 31, %a @@ -58,7 +56,7 @@ ; CHECK-NEXT: movi v1.4s, #42 ; CHECK-NEXT: movi v2.4s, #1, msl #8 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %a = and <4 x i32> %x, %m = sub <4 x i32> , %a diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll --- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll @@ -248,38 +248,40 @@ ret void } +; this could have the offset transform, but sub became xor + define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_alt() #1 { ; CI-LABEL: add_x_shl_neg_to_sub_max_offset_alt: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 +; CI-NEXT: v_xor_b32_e32 v0, 0xffff, v0 ; CI-NEXT: v_mov_b32_e32 v1, 13 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_write_b8 v0, v1 offset:65535 +; CI-NEXT: ds_write_b8 v0, v1 ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_alt: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 13 -; GFX9-NEXT: ds_write_b8 v0, v1 offset:65535 +; GFX9-NEXT: ds_write_b8 v0, v1 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_alt: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 13 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0, v0 -; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535 +; GFX10-NEXT: v_xor_b32_e32 v0, 0xffff, v0 +; GFX10-NEXT: ds_write_b8 v0, v1 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_alt: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0 -; GFX11-NEXT: ds_store_b8 v0, v1 offset:65535 +; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: ds_store_b8 v0, v1 ; GFX11-NEXT: s_endpgm %x.i = tail call i32 @llvm.amdgcn.workitem.id.x() %.neg = mul i32 %x.i, -4 @@ -290,38 +292,40 @@ ret void } +; this could have the offset transform, but sub became xor + define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_not_canonical() #1 { ; CI-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical: ; CI: ; %bb.0: ; CI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CI-NEXT: v_sub_i32_e32 v0, vcc, 0, v0 +; CI-NEXT: v_xor_b32_e32 v0, 0xffff, v0 ; CI-NEXT: v_mov_b32_e32 v1, 13 ; CI-NEXT: s_mov_b32 m0, -1 -; CI-NEXT: ds_write_b8 v0, v1 offset:65535 +; CI-NEXT: ds_write_b8 v0, v1 ; CI-NEXT: s_endpgm ; ; GFX9-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 +; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, 13 -; GFX9-NEXT: ds_write_b8 v0, v1 offset:65535 +; GFX9-NEXT: ds_write_b8 v0, v1 ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, 13 -; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0, v0 -; GFX10-NEXT: ds_write_b8 v0, v1 offset:65535 +; GFX10-NEXT: v_xor_b32_e32 v0, 0xffff, v0 +; GFX10-NEXT: ds_write_b8 v0, v1 ; GFX10-NEXT: s_endpgm ; ; GFX11-LABEL: add_x_shl_neg_to_sub_max_offset_not_canonical: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_dual_mov_b32 v1, 13 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0, v0 -; GFX11-NEXT: ds_store_b8 v0, v1 offset:65535 +; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff, v0 +; GFX11-NEXT: ds_store_b8 v0, v1 ; GFX11-NEXT: s_endpgm %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 %neg = sub i32 0, %x.i diff --git a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll --- a/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll +++ b/llvm/test/CodeGen/AMDGPU/setcc-multiple-use.ll @@ -15,10 +15,10 @@ ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; CHECK-NEXT: s_cmpk_lg_u32 vcc_lo, 0x0 -; CHECK-NEXT: s_subb_u32 s4, 1, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, s4, vcc_lo +; CHECK-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc_lo ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: %i = load i32, i32 addrspace(3)* null, align 16 diff --git a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll --- a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll +++ b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll @@ -58,18 +58,18 @@ ; ARM: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] ; ARM: mov r[[R2:[0-9]+]], #0 ; ARM: adc r[[R0]], r[[R2]], #0 - ; ARM: rsb r[[R0]], r[[R0]], #1 + ; ARM: eor r[[R0]], r[[R0]], #1 ; THUMBV6: movs r[[R2:[0-9]+]], #0 ; THUMBV6: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] ; THUMBV6: adcs r[[R2]], r[[R2]] ; THUMBV6: movs r[[R0]], #1 - ; THUMBV6: subs r[[R0]], r[[R0]], r[[R2]] + ; THUMBV6: eors r[[R0]], r[[R2]] ; THUMBV7: subs r[[R0:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] ; THUMBV7: mov.w r[[R2:[0-9]+]], #0 ; THUMBV7: adc r[[R0]], r[[R2]], #0 - ; THUMBV7: rsb.w r[[R0]], r[[R0]], #1 + ; THUMBV7: eor r[[R0]], r[[R0]], #1 ; We should know that the overflow is just 1 bit, ; no need to clear any other bit diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll --- a/llvm/test/CodeGen/ARM/usub_sat.ll +++ b/llvm/test/CodeGen/ARM/usub_sat.ll @@ -48,7 +48,7 @@ ; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: adcs r0, r1 ; CHECK-T1-NEXT: movs r3, #1 -; CHECK-T1-NEXT: subs r3, r3, r0 +; CHECK-T1-NEXT: eors r3, r0 ; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: beq .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: @@ -70,7 +70,7 @@ ; CHECK-T2-NEXT: mov.w r12, #0 ; CHECK-T2-NEXT: sbcs r1, r3 ; CHECK-T2-NEXT: adc r2, r12, #0 -; CHECK-T2-NEXT: rsbs.w r2, r2, #1 +; CHECK-T2-NEXT: eors r2, r2, #1 ; CHECK-T2-NEXT: itt ne ; CHECK-T2-NEXT: movne r0, #0 ; CHECK-T2-NEXT: movne r1, #0 @@ -82,7 +82,7 @@ ; CHECK-ARM-NEXT: mov r12, #0 ; CHECK-ARM-NEXT: sbcs r1, r1, r3 ; CHECK-ARM-NEXT: adc r2, r12, #0 -; CHECK-ARM-NEXT: rsbs r2, r2, #1 +; CHECK-ARM-NEXT: eors r2, r2, #1 ; CHECK-ARM-NEXT: movwne r0, #0 ; CHECK-ARM-NEXT: movwne r1, #0 ; CHECK-ARM-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll --- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll @@ -54,7 +54,7 @@ ; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: adcs r0, r1 ; CHECK-T1-NEXT: movs r4, #1 -; CHECK-T1-NEXT: subs r4, r4, r0 +; CHECK-T1-NEXT: eors r4, r0 ; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: beq .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: @@ -77,7 +77,7 @@ ; CHECK-T2-NEXT: subs r0, r0, r2 ; CHECK-T2-NEXT: sbcs r1, r3 ; CHECK-T2-NEXT: adc r2, r12, #0 -; CHECK-T2-NEXT: rsbs.w r2, r2, #1 +; CHECK-T2-NEXT: eors r2, r2, #1 ; CHECK-T2-NEXT: itt ne ; CHECK-T2-NEXT: movne r0, #0 ; CHECK-T2-NEXT: movne r1, #0 @@ -91,7 +91,7 @@ ; CHECK-ARM-NEXT: subs r0, r0, r2 ; CHECK-ARM-NEXT: sbcs r1, r1, r3 ; CHECK-ARM-NEXT: adc r2, r12, #0 -; CHECK-ARM-NEXT: rsbs r2, r2, #1 +; CHECK-ARM-NEXT: eors r2, r2, #1 ; CHECK-ARM-NEXT: movwne r0, #0 ; CHECK-ARM-NEXT: movwne r1, #0 ; CHECK-ARM-NEXT: bx lr diff --git a/llvm/test/CodeGen/PowerPC/bool-math.ll b/llvm/test/CodeGen/PowerPC/bool-math.ll --- a/llvm/test/CodeGen/PowerPC/bool-math.ll +++ b/llvm/test/CodeGen/PowerPC/bool-math.ll @@ -45,8 +45,8 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) { ; CHECK-LABEL: add_zext_cmp_mask_same_size_result: ; CHECK: # %bb.0: -; CHECK-NEXT: clrlwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, 27 +; CHECK-NEXT: clrldi 3, 3, 63 +; CHECK-NEXT: xori 3, 3, 27 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -58,8 +58,8 @@ define i32 @add_zext_cmp_mask_wider_result(i8 %x) { ; CHECK-LABEL: add_zext_cmp_mask_wider_result: ; CHECK: # %bb.0: -; CHECK-NEXT: clrlwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, 27 +; CHECK-NEXT: clrldi 3, 3, 63 +; CHECK-NEXT: xori 3, 3, 27 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -71,8 +71,8 @@ define i8 @add_zext_cmp_mask_narrower_result(i32 %x) { ; CHECK-LABEL: add_zext_cmp_mask_narrower_result: ; CHECK: # %bb.0: -; CHECK-NEXT: clrlwi 3, 3, 31 -; CHECK-NEXT: subfic 3, 3, 43 +; CHECK-NEXT: clrldi 3, 3, 63 +; CHECK-NEXT: xori 3, 3, 43 ; CHECK-NEXT: blr %a = and i32 %x, 1 %c = icmp eq i32 %a, 0 @@ -120,8 +120,9 @@ define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) { ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result: ; CHECK: # %bb.0: +; CHECK-NEXT: li 4, -29 ; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: subfic 3, 3, -29 +; CHECK-NEXT: xor 3, 3, 4 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -133,7 +134,7 @@ ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result: ; CHECK: # %bb.0: ; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: subfic 3, 3, 227 +; CHECK-NEXT: xori 3, 3, 227 ; CHECK-NEXT: blr %a = and i8 %x, 1 %c = icmp eq i8 %a, 0 @@ -145,7 +146,7 @@ ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result: ; CHECK: # %bb.0: ; CHECK-NEXT: clrldi 3, 3, 63 -; CHECK-NEXT: subfic 3, 3, 41 +; CHECK-NEXT: xori 3, 3, 41 ; CHECK-NEXT: blr %a = and i16 %x, 1 %c = icmp eq i16 %a, 0 diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll --- a/llvm/test/CodeGen/PowerPC/select_const.ll +++ b/llvm/test/CodeGen/PowerPC/select_const.ll @@ -495,7 +495,7 @@ ; ALL-LABEL: sel_constants_urem_constant: ; ALL: # %bb.0: ; ALL-NEXT: clrldi 3, 3, 63 -; ALL-NEXT: subfic 3, 3, 3 +; ALL-NEXT: xori 3, 3, 3 ; ALL-NEXT: blr %sel = select i1 %cond, i8 -4, i8 23 %bo = urem i8 %sel, 5 @@ -530,7 +530,7 @@ ; ALL-LABEL: sel_constants_and_constant: ; ALL: # %bb.0: ; ALL-NEXT: clrldi 3, 3, 63 -; ALL-NEXT: subfic 3, 3, 5 +; ALL-NEXT: xori 3, 3, 5 ; ALL-NEXT: blr %sel = select i1 %cond, i8 -4, i8 23 %bo = and i8 %sel, 5 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -2051,8 +2051,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -2123,8 +2122,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB35_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -2199,8 +2197,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -2271,8 +2268,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB36_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -2347,8 +2343,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -2419,8 +2414,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB37_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -2495,8 +2489,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -2567,8 +2560,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB38_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -2643,8 +2635,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -2715,8 +2706,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB39_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -2791,8 +2781,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -2863,8 +2852,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB40_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -2939,8 +2927,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -3011,8 +2998,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB41_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -3087,8 +3073,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -3159,8 +3144,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB42_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -3235,8 +3219,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aq a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -3307,8 +3290,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB43_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aq a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -3383,8 +3365,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w.aqrl a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -3455,8 +3436,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB44_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w.aqrl a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -628,8 +628,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -703,8 +702,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 @@ -782,8 +780,7 @@ ; RV32IA-NEXT: slli a1, a1, 24 ; RV32IA-NEXT: srai a1, a1, 24 ; RV32IA-NEXT: sll a1, a1, a0 -; RV32IA-NEXT: li a5, 24 -; RV32IA-NEXT: sub a3, a5, a3 +; RV32IA-NEXT: xori a3, a3, 24 ; RV32IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; RV32IA-NEXT: lr.w a5, (a2) ; RV32IA-NEXT: and a7, a5, a4 @@ -857,8 +854,7 @@ ; RV64IA-NEXT: slli a1, a1, 56 ; RV64IA-NEXT: srai a1, a1, 56 ; RV64IA-NEXT: sllw a1, a1, a0 -; RV64IA-NEXT: li a5, 56 -; RV64IA-NEXT: sub a3, a5, a3 +; RV64IA-NEXT: xori a3, a3, 56 ; RV64IA-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 ; RV64IA-NEXT: lr.w a5, (a2) ; RV64IA-NEXT: and a7, a5, a4 diff --git a/llvm/test/CodeGen/SPARC/64bit.ll b/llvm/test/CodeGen/SPARC/64bit.ll --- a/llvm/test/CodeGen/SPARC/64bit.ll +++ b/llvm/test/CodeGen/SPARC/64bit.ll @@ -238,8 +238,8 @@ declare void @g(i8*) ; CHECK: expand_setcc -; CHECK: cmp %i0, 1 -; CHECK: movl %xcc, 1, +; CHECK: cmp %i0, 0 +; CHECK: movg %xcc, 1, define i32 @expand_setcc(i64 %a) { %cond = icmp sle i64 %a, 0 %cast2 = zext i1 %cond to i32