diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -333,7 +333,8 @@ SDValue visitTokenFactor(SDNode *N); SDValue visitMERGE_VALUES(SDNode *N); SDValue visitADD(SDNode *N); - SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitADDLike(SDNode *N); + SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitSUB(SDNode *N); SDValue visitADDSAT(SDNode *N); SDValue visitSUBSAT(SDNode *N); @@ -2102,7 +2103,10 @@ return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); } -SDValue DAGCombiner::visitADD(SDNode *N) { +/// Try to fold a node that behaves like an ADD (note that N isn't neccessarily +/// an ISD::ADD here, it could for example be an ISD::OR if we known that there +/// are no common bits set in the operands). +SDValue DAGCombiner::visitADDLike(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); @@ -2255,20 +2259,9 @@ N0.getOperand(1)); } - if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) - return V; - - if (SDValue V = foldAddSubOfSignBit(N, DAG)) - return V; - if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - // fold (a+b) -> (a|b) iff a and b share no bits. - if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - DAG.haveNoCommonBitsSet(N0, N1)) - return DAG.getNode(ISD::OR, DL, VT, N0, N1); - if (isOneOrOneSplat(N1)) { // fold (add (xor a, -1), 1) -> (sub 0, a) if (isBitwiseNot(N0)) @@ -2294,15 +2287,38 @@ } } - if (SDValue Combined = visitADDLike(N0, N1, N)) + if (SDValue Combined = visitADDLikeCommutative(N0, N1, N)) return Combined; - if (SDValue Combined = visitADDLike(N1, N0, N)) + if (SDValue Combined = visitADDLikeCommutative(N1, N0, N)) return Combined; return SDValue(); } +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + if (SDValue Combined = visitADDLike(N)) + return Combined; + + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + + // fold (a+b) -> (a|b) iff a and b share no bits. + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitADDSAT(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); @@ -2405,7 +2421,9 @@ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); } -SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { +/// Helper for doing combines based on N0 and N1 being added to each other. +SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, + SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); @@ -5529,6 +5547,12 @@ if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // If OR can be rewritten into ADD, try combines based on ADD. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + if (SDValue Combined = visitADDLike(N)) + return Combined; + return SDValue(); } diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll --- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll +++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll @@ -171,9 +171,9 @@ ; SI: v_lshlrev_b32_e32 v1, 16, v1 ; SI: v_add_i32_e32 v0, vcc, 1, v0 -; SI: v_add_i32_e32 v1, vcc, 0x10000, v1 ; SI: v_and_b32 ; SI: v_or_b32 +; SI: v_add_i32_e32 v0, vcc, 0x10000, v0 define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) { %add = add <2 x i16> %arg0, store <2 x i16> %add, <2 x i16> addrspace(1)* undef @@ -183,16 +183,16 @@ ; GCN-LABEL: {{^}}ps_mesa_inreg_v2i16: ; VI: s_and_b32 s1, s0, 0xffff0000 ; VI: s_add_i32 s0, s0, 1 -; VI: s_add_i32 s1, s1, 0x10000 ; VI: s_and_b32 s0, s0, 0xffff ; VI: s_or_b32 s0, s0, s1 +; VI: s_add_i32 s0, s0, 0x10000 ; VI: v_mov_b32_e32 v0, s0 ; SI: s_lshl_b32 s1, s1, 16 ; SI: s_add_i32 s0, s0, 1 -; SI: s_add_i32 s1, s1, 0x10000 ; SI: s_and_b32 s0, s0, 0xffff ; SI: s_or_b32 s0, s0, s1 +; SI: s_add_i32 s0, s0, 0x10000 define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) { %add = add <2 x i16> %arg0, store <2 x i16> %add, <2 x i16> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll --- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -14,9 +14,9 @@ ; CIVI: s_max_i32 ; CIVI: s_max_i32 ; CIVI: s_add_i32 -; CIVI: s_add_i32 -; CIVI: s_and_b32 -; CIVI: s_or_b32 +; CIVI-DAG: s_add_i32 +; CIVI-DAG: s_and_b32 +; CIVI-DAG: s_or_b32 define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 { %neg = sub <2 x i16> zeroinitializer, %val %cond = icmp sgt <2 x i16> %val, %neg @@ -45,14 +45,14 @@ ; CI: buffer_load_dword v ; CI: v_lshrrev_b32_e32 -; CI: v_sub_i32_e32 -; CI: v_bfe_i32 -; CI: v_bfe_i32 -; CI: v_max_i32 -; CI: v_max_i32 -; CI: v_add_i32 -; CI: v_add_i32 -; CI: v_or_b32 +; CI-DAG: v_sub_i32_e32 +; CI-DAG: v_bfe_i32 +; CI-DAG: v_bfe_i32 +; CI-DAG: v_max_i32 +; CI-DAG: v_max_i32 +; CI-DAG: v_add_i32 +; CI-DAG: v_add_i32 +; CI-DAG: v_or_b32 define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %src, i32 %tid diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll --- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll +++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll @@ -213,12 +213,11 @@ ; SI-NEXT: s_load_dword s0, s[0:1], 0x0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_and_b32 s1, s0, 0xff00 -; SI-NEXT: s_and_b32 s0, s0, 0xffff ; SI-NEXT: s_add_i32 s0, s0, 12 ; SI-NEXT: s_or_b32 s0, s0, 4 -; SI-NEXT: s_addk_i32 s1, 0x2c00 ; SI-NEXT: s_and_b32 s0, s0, 0xff ; SI-NEXT: s_or_b32 s0, s0, s1 +; SI-NEXT: s_addk_i32 s0, 0x2c00 ; SI-NEXT: s_or_b32 s0, s0, 0x300 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 diff --git a/llvm/test/CodeGen/Hexagon/subi-asl.ll b/llvm/test/CodeGen/Hexagon/subi-asl.ll --- a/llvm/test/CodeGen/Hexagon/subi-asl.ll +++ b/llvm/test/CodeGen/Hexagon/subi-asl.ll @@ -3,7 +3,10 @@ ; Check if S4_subi_asl_ri is being generated correctly. ; CHECK-LABEL: yes_sub_asl -; CHECK: [[REG1:(r[0-9]+)]] = sub(#0,asl([[REG1]],#1)) +; FIXME: We no longer get subi_asl here. +; XCHECK: [[REG1:(r[0-9]+)]] = sub(#0,asl([[REG1]],#1)) +; CHECK: [[REG1:(r[0-9]+)]] = asl([[REG1]],#1) +; CHECK: = sub(#0,[[REG1]]) ; CHECK-LABEL: no_sub_asl ; CHECK: [[REG2:(r[0-9]+)]] = asl(r{{[0-9]+}},#1) diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -17,131 +17,133 @@ ; ILP-NEXT: movq %rdi, %rax ; ILP-NEXT: xorl %r8d, %r8d ; ILP-NEXT: addq %rsi, %rsi -; ILP-NEXT: addb $2, %sil -; ILP-NEXT: orb $1, %sil -; ILP-NEXT: movl $1, %r10d +; ILP-NEXT: leal 3(%rsi), %r9d +; ILP-NEXT: movl $1, %r11d ; ILP-NEXT: xorl %r14d, %r14d -; ILP-NEXT: movl %esi, %ecx -; ILP-NEXT: shldq %cl, %r10, %r14 +; ILP-NEXT: movl %r9d, %ecx +; ILP-NEXT: shldq %cl, %r11, %r14 ; ILP-NEXT: movl $1, %edx ; ILP-NEXT: shlq %cl, %rdx -; ILP-NEXT: movb $-128, %r11b -; ILP-NEXT: subb %sil, %r11b -; ILP-NEXT: leal -128(%rsi), %r9d -; ILP-NEXT: xorl %edi, %edi -; ILP-NEXT: movl %r9d, %ecx -; ILP-NEXT: shldq %cl, %r10, %rdi -; ILP-NEXT: movl $1, %ebx -; ILP-NEXT: shlq %cl, %rbx -; ILP-NEXT: movl %r11d, %ecx -; ILP-NEXT: shrdq %cl, %r8, %r10 -; ILP-NEXT: testb $64, %sil +; ILP-NEXT: leal -125(%rsi), %r10d +; ILP-NEXT: xorl %ebx, %ebx +; ILP-NEXT: movl %r10d, %ecx +; ILP-NEXT: shldq %cl, %r11, %rbx +; ILP-NEXT: testb $64, %r9b ; ILP-NEXT: cmovneq %rdx, %r14 ; ILP-NEXT: cmovneq %r8, %rdx -; ILP-NEXT: testb $64, %r11b -; ILP-NEXT: cmovneq %r8, %r10 -; ILP-NEXT: testb $64, %r9b -; ILP-NEXT: cmovneq %rbx, %rdi -; ILP-NEXT: cmovneq %r8, %rbx -; ILP-NEXT: testb %sil, %sil +; ILP-NEXT: movl $1, %edi +; ILP-NEXT: shlq %cl, %rdi +; ILP-NEXT: movb $125, %cl +; ILP-NEXT: subb %sil, %cl +; ILP-NEXT: shrdq %cl, %r8, %r11 +; ILP-NEXT: testb $64, %cl +; ILP-NEXT: cmovneq %r8, %r11 +; ILP-NEXT: testb $64, %r10b +; ILP-NEXT: cmovneq %rdi, %rbx +; ILP-NEXT: cmovneq %r8, %rdi +; ILP-NEXT: testb %r9b, %r9b ; ILP-NEXT: cmovsq %r8, %r14 ; ILP-NEXT: cmovsq %r8, %rdx ; ILP-NEXT: movq %r14, 8(%rax) ; ILP-NEXT: movq %rdx, (%rax) -; ILP-NEXT: cmovnsq %r8, %rdi -; ILP-NEXT: cmoveq %r8, %rdi -; ILP-NEXT: movq %rdi, 24(%rax) -; ILP-NEXT: cmovnsq %r10, %rbx +; ILP-NEXT: cmovnsq %r8, %rbx ; ILP-NEXT: cmoveq %r8, %rbx -; ILP-NEXT: movq %rbx, 16(%rax) +; ILP-NEXT: movq %rbx, 24(%rax) +; ILP-NEXT: cmovnsq %r11, %rdi +; ILP-NEXT: cmoveq %r8, %rdi +; ILP-NEXT: movq %rdi, 16(%rax) ; ILP-NEXT: popq %rbx ; ILP-NEXT: popq %r14 ; ILP-NEXT: retq ; ; HYBRID-LABEL: test1: ; HYBRID: # %bb.0: +; HYBRID-NEXT: pushq %rbx ; HYBRID-NEXT: movq %rdi, %rax ; HYBRID-NEXT: addq %rsi, %rsi -; HYBRID-NEXT: addb $2, %sil -; HYBRID-NEXT: orb $1, %sil -; HYBRID-NEXT: movb $-128, %cl +; HYBRID-NEXT: movb $125, %cl ; HYBRID-NEXT: subb %sil, %cl ; HYBRID-NEXT: xorl %r8d, %r8d -; HYBRID-NEXT: movl $1, %r11d +; HYBRID-NEXT: movl $1, %edi ; HYBRID-NEXT: movl $1, %r9d ; HYBRID-NEXT: shrdq %cl, %r8, %r9 ; HYBRID-NEXT: testb $64, %cl ; HYBRID-NEXT: cmovneq %r8, %r9 -; HYBRID-NEXT: xorl %r10d, %r10d +; HYBRID-NEXT: leal 3(%rsi), %r10d +; HYBRID-NEXT: xorl %r11d, %r11d +; HYBRID-NEXT: movl %r10d, %ecx +; HYBRID-NEXT: shldq %cl, %rdi, %r11 +; HYBRID-NEXT: addb $-125, %sil +; HYBRID-NEXT: xorl %edx, %edx ; HYBRID-NEXT: movl %esi, %ecx -; HYBRID-NEXT: shldq %cl, %r11, %r10 -; HYBRID-NEXT: leal -128(%rsi), %ecx -; HYBRID-NEXT: xorl %edi, %edi -; HYBRID-NEXT: shldq %cl, %r11, %rdi -; HYBRID-NEXT: movl $1, %edx -; HYBRID-NEXT: shlq %cl, %rdx -; HYBRID-NEXT: testb $64, %cl -; HYBRID-NEXT: cmovneq %rdx, %rdi -; HYBRID-NEXT: cmovneq %r8, %rdx -; HYBRID-NEXT: movl %esi, %ecx -; HYBRID-NEXT: shlq %cl, %r11 +; HYBRID-NEXT: shldq %cl, %rdi, %rdx +; HYBRID-NEXT: movl $1, %ebx +; HYBRID-NEXT: shlq %cl, %rbx ; HYBRID-NEXT: testb $64, %sil -; HYBRID-NEXT: cmovneq %r11, %r10 -; HYBRID-NEXT: cmovneq %r8, %r11 -; HYBRID-NEXT: testb %sil, %sil -; HYBRID-NEXT: cmovsq %r8, %r10 -; HYBRID-NEXT: movq %r10, 8(%rax) +; HYBRID-NEXT: cmovneq %rbx, %rdx +; HYBRID-NEXT: cmovneq %r8, %rbx +; HYBRID-NEXT: movl %r10d, %ecx +; HYBRID-NEXT: shlq %cl, %rdi +; HYBRID-NEXT: testb $64, %r10b +; HYBRID-NEXT: cmovneq %rdi, %r11 +; HYBRID-NEXT: cmovneq %r8, %rdi +; HYBRID-NEXT: testb %r10b, %r10b ; HYBRID-NEXT: cmovsq %r8, %r11 -; HYBRID-NEXT: movq %r11, (%rax) -; HYBRID-NEXT: cmovnsq %r8, %rdi -; HYBRID-NEXT: cmoveq %r8, %rdi -; HYBRID-NEXT: movq %rdi, 24(%rax) -; HYBRID-NEXT: cmovnsq %r9, %rdx +; HYBRID-NEXT: movq %r11, 8(%rax) +; HYBRID-NEXT: cmovsq %r8, %rdi +; HYBRID-NEXT: movq %rdi, (%rax) +; HYBRID-NEXT: cmovnsq %r8, %rdx ; HYBRID-NEXT: cmoveq %r8, %rdx -; HYBRID-NEXT: movq %rdx, 16(%rax) +; HYBRID-NEXT: movq %rdx, 24(%rax) +; HYBRID-NEXT: cmovnsq %r9, %rbx +; HYBRID-NEXT: cmoveq %r8, %rbx +; HYBRID-NEXT: movq %rbx, 16(%rax) +; HYBRID-NEXT: popq %rbx ; HYBRID-NEXT: retq ; ; BURR-LABEL: test1: ; BURR: # %bb.0: +; BURR-NEXT: pushq %rbx ; BURR-NEXT: movq %rdi, %rax ; BURR-NEXT: addq %rsi, %rsi -; BURR-NEXT: addb $2, %sil -; BURR-NEXT: orb $1, %sil -; BURR-NEXT: movb $-128, %cl +; BURR-NEXT: movb $125, %cl ; BURR-NEXT: subb %sil, %cl ; BURR-NEXT: xorl %r8d, %r8d -; BURR-NEXT: movl $1, %r11d +; BURR-NEXT: movl $1, %edi ; BURR-NEXT: movl $1, %r9d ; BURR-NEXT: shrdq %cl, %r8, %r9 ; BURR-NEXT: testb $64, %cl ; BURR-NEXT: cmovneq %r8, %r9 -; BURR-NEXT: xorl %r10d, %r10d -; BURR-NEXT: movl %esi, %ecx -; BURR-NEXT: shldq %cl, %r11, %r10 -; BURR-NEXT: leal -128(%rsi), %ecx -; BURR-NEXT: xorl %edi, %edi -; BURR-NEXT: shldq %cl, %r11, %rdi -; BURR-NEXT: movl $1, %edx -; BURR-NEXT: shlq %cl, %rdx -; BURR-NEXT: testb $64, %cl -; BURR-NEXT: cmovneq %rdx, %rdi -; BURR-NEXT: cmovneq %r8, %rdx +; BURR-NEXT: leal 3(%rsi), %r10d +; BURR-NEXT: xorl %r11d, %r11d +; BURR-NEXT: movl %r10d, %ecx +; BURR-NEXT: shldq %cl, %rdi, %r11 +; BURR-NEXT: addb $-125, %sil +; BURR-NEXT: xorl %edx, %edx ; BURR-NEXT: movl %esi, %ecx -; BURR-NEXT: shlq %cl, %r11 +; BURR-NEXT: shldq %cl, %rdi, %rdx +; BURR-NEXT: movl $1, %ebx +; BURR-NEXT: shlq %cl, %rbx ; BURR-NEXT: testb $64, %sil -; BURR-NEXT: cmovneq %r11, %r10 -; BURR-NEXT: cmovneq %r8, %r11 -; BURR-NEXT: testb %sil, %sil -; BURR-NEXT: cmovsq %r8, %r10 -; BURR-NEXT: movq %r10, 8(%rax) +; BURR-NEXT: cmovneq %rbx, %rdx +; BURR-NEXT: cmovneq %r8, %rbx +; BURR-NEXT: movl %r10d, %ecx +; BURR-NEXT: shlq %cl, %rdi +; BURR-NEXT: testb $64, %r10b +; BURR-NEXT: cmovneq %rdi, %r11 +; BURR-NEXT: cmovneq %r8, %rdi +; BURR-NEXT: testb %r10b, %r10b ; BURR-NEXT: cmovsq %r8, %r11 -; BURR-NEXT: movq %r11, (%rax) -; BURR-NEXT: cmovnsq %r8, %rdi -; BURR-NEXT: cmoveq %r8, %rdi -; BURR-NEXT: movq %rdi, 24(%rax) -; BURR-NEXT: cmovnsq %r9, %rdx +; BURR-NEXT: movq %r11, 8(%rax) +; BURR-NEXT: cmovsq %r8, %rdi +; BURR-NEXT: movq %rdi, (%rax) +; BURR-NEXT: cmovnsq %r8, %rdx ; BURR-NEXT: cmoveq %r8, %rdx -; BURR-NEXT: movq %rdx, 16(%rax) +; BURR-NEXT: movq %rdx, 24(%rax) +; BURR-NEXT: cmovnsq %r9, %rbx +; BURR-NEXT: cmoveq %r8, %rbx +; BURR-NEXT: movq %rbx, 16(%rax) +; BURR-NEXT: popq %rbx ; BURR-NEXT: retq ; ; SRC-LABEL: test1: @@ -149,9 +151,8 @@ ; SRC-NEXT: pushq %rbx ; SRC-NEXT: movq %rdi, %rax ; SRC-NEXT: addq %rsi, %rsi -; SRC-NEXT: addb $2, %sil -; SRC-NEXT: orb $1, %sil -; SRC-NEXT: movb $-128, %cl +; SRC-NEXT: leal 3(%rsi), %r9d +; SRC-NEXT: movb $125, %cl ; SRC-NEXT: subb %sil, %cl ; SRC-NEXT: xorl %r8d, %r8d ; SRC-NEXT: movl $1, %edi @@ -159,24 +160,24 @@ ; SRC-NEXT: shrdq %cl, %r8, %r10 ; SRC-NEXT: testb $64, %cl ; SRC-NEXT: cmovneq %r8, %r10 -; SRC-NEXT: leal -128(%rsi), %r9d +; SRC-NEXT: addb $-125, %sil ; SRC-NEXT: xorl %edx, %edx -; SRC-NEXT: movl %r9d, %ecx +; SRC-NEXT: movl %esi, %ecx ; SRC-NEXT: shldq %cl, %rdi, %rdx ; SRC-NEXT: xorl %r11d, %r11d -; SRC-NEXT: movl %esi, %ecx +; SRC-NEXT: movl %r9d, %ecx ; SRC-NEXT: shldq %cl, %rdi, %r11 ; SRC-NEXT: movl $1, %ebx ; SRC-NEXT: shlq %cl, %rbx -; SRC-NEXT: testb $64, %sil +; SRC-NEXT: testb $64, %r9b ; SRC-NEXT: cmovneq %rbx, %r11 ; SRC-NEXT: cmovneq %r8, %rbx -; SRC-NEXT: movl %r9d, %ecx +; SRC-NEXT: movl %esi, %ecx ; SRC-NEXT: shlq %cl, %rdi -; SRC-NEXT: testb $64, %r9b +; SRC-NEXT: testb $64, %sil ; SRC-NEXT: cmovneq %rdi, %rdx ; SRC-NEXT: cmovneq %r8, %rdi -; SRC-NEXT: testb %sil, %sil +; SRC-NEXT: testb %r9b, %r9b ; SRC-NEXT: cmovnsq %r10, %rdi ; SRC-NEXT: cmoveq %r8, %rdi ; SRC-NEXT: cmovnsq %r8, %rdx @@ -196,31 +197,29 @@ ; LIN-NEXT: xorl %r9d, %r9d ; LIN-NEXT: movl $1, %r8d ; LIN-NEXT: addq %rsi, %rsi -; LIN-NEXT: addb $2, %sil -; LIN-NEXT: orb $1, %sil -; LIN-NEXT: movl $1, %edx -; LIN-NEXT: movl %esi, %ecx -; LIN-NEXT: shlq %cl, %rdx -; LIN-NEXT: testb $64, %sil -; LIN-NEXT: movq %rdx, %rcx -; LIN-NEXT: cmovneq %r9, %rcx -; LIN-NEXT: testb %sil, %sil -; LIN-NEXT: cmovsq %r9, %rcx -; LIN-NEXT: movq %rcx, (%rdi) -; LIN-NEXT: xorl %edi, %edi -; LIN-NEXT: movl %esi, %ecx -; LIN-NEXT: shldq %cl, %r8, %rdi -; LIN-NEXT: cmovneq %rdx, %rdi -; LIN-NEXT: cmovsq %r9, %rdi -; LIN-NEXT: movq %rdi, 8(%rax) -; LIN-NEXT: leal -128(%rsi), %r10d +; LIN-NEXT: leal 3(%rsi), %ecx +; LIN-NEXT: movl $1, %edi +; LIN-NEXT: shlq %cl, %rdi +; LIN-NEXT: testb $64, %cl +; LIN-NEXT: movq %rdi, %rdx +; LIN-NEXT: cmovneq %r9, %rdx +; LIN-NEXT: testb %cl, %cl +; LIN-NEXT: cmovsq %r9, %rdx +; LIN-NEXT: movq %rdx, (%rax) +; LIN-NEXT: xorl %edx, %edx +; LIN-NEXT: # kill: def $cl killed $cl killed $ecx +; LIN-NEXT: shldq %cl, %r8, %rdx +; LIN-NEXT: cmovneq %rdi, %rdx +; LIN-NEXT: cmovsq %r9, %rdx +; LIN-NEXT: movq %rdx, 8(%rax) +; LIN-NEXT: leal -125(%rsi), %r10d ; LIN-NEXT: movl $1, %edx ; LIN-NEXT: movl %r10d, %ecx ; LIN-NEXT: shlq %cl, %rdx ; LIN-NEXT: testb $64, %r10b ; LIN-NEXT: movq %rdx, %rdi ; LIN-NEXT: cmovneq %r9, %rdi -; LIN-NEXT: movb $-128, %cl +; LIN-NEXT: movb $125, %cl ; LIN-NEXT: subb %sil, %cl ; LIN-NEXT: movl $1, %esi ; LIN-NEXT: shrdq %cl, %r9, %rsi diff --git a/llvm/test/CodeGen/X86/signbit-shift.ll b/llvm/test/CodeGen/X86/signbit-shift.ll --- a/llvm/test/CodeGen/X86/signbit-shift.ll +++ b/llvm/test/CodeGen/X86/signbit-shift.ll @@ -33,8 +33,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 -; CHECK-NEXT: psrld $31, %xmm0 -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] +; CHECK-NEXT: psubd %xmm0, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %c = icmp sgt <4 x i32> %x, %e = zext <4 x i1> %c to <4 x i32> diff --git a/llvm/test/CodeGen/X86/split-store.ll b/llvm/test/CodeGen/X86/split-store.ll --- a/llvm/test/CodeGen/X86/split-store.ll +++ b/llvm/test/CodeGen/X86/split-store.ll @@ -217,10 +217,9 @@ ; CHECK-LABEL: int1_int1_pair: ; CHECK: # %bb.0: ; CHECK-NEXT: addb %sil, %sil -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: orb %sil, %dil -; CHECK-NEXT: andb $3, %dil -; CHECK-NEXT: movb %dil, (%rdx) +; CHECK-NEXT: subb %dil, %sil +; CHECK-NEXT: andb $3, %sil +; CHECK-NEXT: movb %sil, (%rdx) ; CHECK-NEXT: retq %t1 = zext i1 %tmp2 to i2 %t2 = shl nuw i2 %t1, 1