Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -341,7 +341,8 @@ SDValue visitTokenFactor(SDNode *N); SDValue visitMERGE_VALUES(SDNode *N); SDValue visitADD(SDNode *N); - SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitADDLike(SDNode *N); + SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference); SDValue visitSUB(SDNode *N); SDValue visitADDSAT(SDNode *N); SDValue visitSUBSAT(SDNode *N); @@ -2111,7 +2112,10 @@ return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); } -SDValue DAGCombiner::visitADD(SDNode *N) { +/// Try to fold a node that behaves like an ADD (note that N isn't necessarily +/// an ISD::ADD here, it could for example be an ISD::OR if we know that there +/// are no common bits set in the operands). +SDValue DAGCombiner::visitADDLike(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); @@ -2264,20 +2268,9 @@ N0.getOperand(1)); } - if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) - return V; - - if (SDValue V = foldAddSubOfSignBit(N, DAG)) - return V; - if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - // fold (a+b) -> (a|b) iff a and b share no bits. - if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && - DAG.haveNoCommonBitsSet(N0, N1)) - return DAG.getNode(ISD::OR, DL, VT, N0, N1); - if (isOneOrOneSplat(N1)) { // fold (add (xor a, -1), 1) -> (sub 0, a) if (isBitwiseNot(N0)) @@ -2303,15 +2296,38 @@ } } - if (SDValue Combined = visitADDLike(N0, N1, N)) + if (SDValue Combined = visitADDLikeCommutative(N0, N1, N)) return Combined; - if (SDValue Combined = visitADDLike(N1, N0, N)) + if (SDValue Combined = visitADDLikeCommutative(N1, N0, N)) return Combined; return SDValue(); } +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + if (SDValue Combined = visitADDLike(N)) + return Combined; + + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + + // fold (a+b) -> (a|b) iff a and b share no bits. + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); + + return SDValue(); +} + SDValue DAGCombiner::visitADDSAT(SDNode *N) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); @@ -2414,7 +2430,9 @@ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); } -SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { +/// Helper for doing combines based on N0 and N1 being added to each other. +SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, + SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); @@ -5546,6 +5564,12 @@ if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // If OR can be rewritten into ADD, try combines based on ADD. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + if (SDValue Combined = visitADDLike(N)) + return Combined; + return SDValue(); } Index: llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll +++ llvm/trunk/test/CodeGen/AMDGPU/calling-conventions.ll @@ -171,9 +171,9 @@ ; SI: v_lshlrev_b32_e32 v1, 16, v1 ; SI: v_add_i32_e32 v0, vcc, 1, v0 -; SI: v_add_i32_e32 v1, vcc, 0x10000, v1 ; SI: v_and_b32 ; SI: v_or_b32 +; SI: v_add_i32_e32 v0, vcc, 0x10000, v0 define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) { %add = add <2 x i16> %arg0, store <2 x i16> %add, <2 x i16> addrspace(1)* undef @@ -183,16 +183,16 @@ ; GCN-LABEL: {{^}}ps_mesa_inreg_v2i16: ; VI: s_and_b32 s1, s0, 0xffff0000 ; VI: s_add_i32 s0, s0, 1 -; VI: s_add_i32 s1, s1, 0x10000 ; VI: s_and_b32 s0, s0, 0xffff ; VI: s_or_b32 s0, s0, s1 +; VI: s_add_i32 s0, s0, 0x10000 ; VI: v_mov_b32_e32 v0, s0 ; SI: s_lshl_b32 s1, s1, 16 ; SI: s_add_i32 s0, s0, 1 -; SI: s_add_i32 s1, s1, 0x10000 ; SI: s_and_b32 s0, s0, 0xffff ; SI: s_or_b32 s0, s0, s1 +; SI: s_add_i32 s0, s0, 0x10000 define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) { %add = add <2 x i16> %arg0, store <2 x i16> %add, <2 x i16> addrspace(1)* undef Index: llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ llvm/trunk/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -276,34 +276,33 @@ ; SI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 ; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x9 ; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb -; SI-NEXT: s_movk_i32 s12, 0x900 +; SI-NEXT: s_movk_i32 s12, 0xff ; SI-NEXT: s_mov_b32 s10, s2 ; SI-NEXT: s_mov_b32 s11, s3 -; SI-NEXT: s_movk_i32 s13, 0xff +; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_lshrrev_b32_e32 v4, 16, v1 -; SI-NEXT: v_lshrrev_b32_e32 v5, 24, v1 -; SI-NEXT: v_and_b32_e32 v6, 0xff00, v1 ; SI-NEXT: v_add_i32_e32 v7, vcc, 9, v1 +; SI-NEXT: v_and_b32_e32 v6, 0xff00, v1 +; SI-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; SI-NEXT: v_cvt_f32_ubyte3_e32 v3, v1 ; SI-NEXT: v_cvt_f32_ubyte2_e32 v2, v1 ; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v1 ; SI-NEXT: v_cvt_f32_ubyte1_e32 v1, v6 -; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 ; SI-NEXT: v_add_i32_e32 v4, vcc, 9, v4 +; SI-NEXT: v_and_b32_e32 v7, s12, v7 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 -; SI-NEXT: v_add_i32_e32 v6, vcc, s12, v6 -; SI-NEXT: v_and_b32_e32 v7, s13, v7 ; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_add_i32_e32 v1, vcc, s12, v5 -; SI-NEXT: v_and_b32_e32 v2, s13, v4 -; SI-NEXT: v_or_b32_e32 v0, v7, v6 -; SI-NEXT: v_or_b32_e32 v1, v2, v1 -; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-NEXT: v_or_b32_e32 v1, v7, v6 +; SI-NEXT: v_lshlrev_b32_e32 v5, 8, v5 +; SI-NEXT: v_and_b32_e32 v0, s12, v4 +; SI-NEXT: v_or_b32_e32 v0, v0, v5 +; SI-NEXT: v_add_i32_e32 v1, vcc, 0x900, v1 +; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; SI-NEXT: v_or_b32_e32 v0, v1, v0 +; SI-NEXT: v_add_i32_e32 v0, vcc, 0x9000000, v0 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; @@ -313,7 +312,7 @@ ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c -; VI-NEXT: s_movk_i32 s8, 0x900 +; VI-NEXT: v_mov_b32_e32 v4, 9 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 @@ -323,23 +322,24 @@ ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_mov_b32 s6, s2 ; VI-NEXT: s_mov_b32 s7, s3 -; VI-NEXT: v_mov_b32_e32 v4, 9 +; VI-NEXT: s_movk_i32 s8, 0x900 +; VI-NEXT: v_mov_b32_e32 v6, s8 ; VI-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v6, 24, v5 +; VI-NEXT: v_lshrrev_b32_e32 v7, 24, v5 ; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v5 ; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v5 ; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v5 ; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v5 ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 -; VI-NEXT: v_and_b32_e32 v7, 0xffffff00, v5 -; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v6 -; VI-NEXT: v_add_u16_e32 v8, 9, v5 -; VI-NEXT: v_add_u16_e32 v0, s8, v7 -; VI-NEXT: v_add_u16_e32 v1, s8, v1 -; VI-NEXT: v_add_u16_sdwa v2, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; VI-NEXT: v_and_b32_e32 v8, 0xffffff00, v5 +; VI-NEXT: v_add_u16_e32 v9, 9, v5 +; VI-NEXT: v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v7 +; VI-NEXT: v_or_b32_sdwa v0, v9, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; VI-NEXT: v_add_u16_e32 v0, s8, v0 +; VI-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %tid.x = call i32 @llvm.amdgcn.workitem.id.x() Index: llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ llvm/trunk/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -14,9 +14,9 @@ ; CIVI: s_max_i32 ; CIVI: s_max_i32 ; CIVI: s_add_i32 -; CIVI: s_add_i32 -; CIVI: s_and_b32 -; CIVI: s_or_b32 +; CIVI-DAG: s_add_i32 +; CIVI-DAG: s_and_b32 +; CIVI-DAG: s_or_b32 define amdgpu_kernel void @s_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %val) #0 { %neg = sub <2 x i16> zeroinitializer, %val %cond = icmp sgt <2 x i16> %val, %neg @@ -45,14 +45,14 @@ ; CI: buffer_load_dword v ; CI: v_lshrrev_b32_e32 -; CI: v_sub_i32_e32 -; CI: v_bfe_i32 -; CI: v_bfe_i32 -; CI: v_max_i32 -; CI: v_max_i32 -; CI: v_add_i32 -; CI: v_add_i32 -; CI: v_or_b32 +; CI-DAG: v_sub_i32_e32 +; CI-DAG: v_bfe_i32 +; CI-DAG: v_bfe_i32 +; CI-DAG: v_max_i32 +; CI-DAG: v_max_i32 +; CI-DAG: v_add_i32 +; CI-DAG: v_add_i32 +; CI-DAG: v_or_b32 define amdgpu_kernel void @v_abs_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %src) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep.in = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %src, i32 %tid Index: llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll +++ llvm/trunk/test/CodeGen/AMDGPU/widen-smrd-loads.ll @@ -213,12 +213,11 @@ ; SI-NEXT: s_load_dword s0, s[0:1], 0x0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_and_b32 s1, s0, 0xff00 -; SI-NEXT: s_and_b32 s0, s0, 0xffff ; SI-NEXT: s_add_i32 s0, s0, 12 ; SI-NEXT: s_or_b32 s0, s0, 4 -; SI-NEXT: s_addk_i32 s1, 0x2c00 ; SI-NEXT: s_and_b32 s0, s0, 0xff ; SI-NEXT: s_or_b32 s0, s0, s1 +; SI-NEXT: s_addk_i32 s0, 0x2c00 ; SI-NEXT: s_or_b32 s0, s0, 0x300 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 Index: llvm/trunk/test/CodeGen/Hexagon/subi-asl.ll =================================================================== --- llvm/trunk/test/CodeGen/Hexagon/subi-asl.ll +++ llvm/trunk/test/CodeGen/Hexagon/subi-asl.ll @@ -3,7 +3,10 @@ ; Check if S4_subi_asl_ri is being generated correctly. ; CHECK-LABEL: yes_sub_asl -; CHECK: [[REG1:(r[0-9]+)]] = sub(#0,asl([[REG1]],#1)) +; FIXME: We no longer get subi_asl here. +; XCHECK: [[REG1:(r[0-9]+)]] = sub(#0,asl([[REG1]],#1)) +; CHECK: [[REG1:(r[0-9]+)]] = asl([[REG1]],#1) +; CHECK: = sub(#0,[[REG1]]) ; CHECK-LABEL: no_sub_asl ; CHECK: [[REG2:(r[0-9]+)]] = asl(r{{[0-9]+}},#1) Index: llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll +++ llvm/trunk/test/CodeGen/X86/scheduler-backtracking.ll @@ -17,131 +17,135 @@ ; ILP-NEXT: movq %rdi, %rax ; ILP-NEXT: xorl %r8d, %r8d ; ILP-NEXT: addl %esi, %esi -; ILP-NEXT: addb $2, %sil -; ILP-NEXT: orb $1, %sil -; ILP-NEXT: movl $1, %r10d -; ILP-NEXT: xorl %r14d, %r14d +; ILP-NEXT: leal 3(%rsi), %r9d +; ILP-NEXT: movb $125, %r10b +; ILP-NEXT: movl $1, %edi +; ILP-NEXT: xorl %r11d, %r11d +; ILP-NEXT: movl %r9d, %ecx +; ILP-NEXT: shldq %cl, %rdi, %r11 +; ILP-NEXT: subb %sil, %r10b +; ILP-NEXT: addb $-125, %sil +; ILP-NEXT: xorl %ebx, %ebx ; ILP-NEXT: movl %esi, %ecx -; ILP-NEXT: shldq %cl, %r10, %r14 +; ILP-NEXT: shldq %cl, %rdi, %rbx ; ILP-NEXT: movl $1, %edx ; ILP-NEXT: shlq %cl, %rdx -; ILP-NEXT: leal -128(%rsi), %r9d -; ILP-NEXT: movb $-128, %r11b -; ILP-NEXT: xorl %ebx, %ebx +; ILP-NEXT: movl $1, %r14d +; ILP-NEXT: movl %r10d, %ecx +; ILP-NEXT: shrdq %cl, %r8, %r14 ; ILP-NEXT: movl %r9d, %ecx -; ILP-NEXT: shldq %cl, %r10, %rbx -; ILP-NEXT: testb $64, %sil -; ILP-NEXT: cmovneq %rdx, %r14 -; ILP-NEXT: cmovneq %r8, %rdx -; ILP-NEXT: movl $1, %edi ; ILP-NEXT: shlq %cl, %rdi -; ILP-NEXT: subb %sil, %r11b -; ILP-NEXT: movl %r11d, %ecx -; ILP-NEXT: shrdq %cl, %r8, %r10 -; ILP-NEXT: testb $64, %r11b -; ILP-NEXT: cmovneq %r8, %r10 ; ILP-NEXT: testb $64, %r9b -; ILP-NEXT: cmovneq %rdi, %rbx +; ILP-NEXT: cmovneq %rdi, %r11 ; ILP-NEXT: cmovneq %r8, %rdi -; ILP-NEXT: testb %sil, %sil -; ILP-NEXT: cmovsq %r8, %r14 -; ILP-NEXT: cmovsq %r8, %rdx -; ILP-NEXT: movq %r14, 8(%rax) -; ILP-NEXT: movq %rdx, (%rax) +; ILP-NEXT: testb $64, %r10b +; ILP-NEXT: cmovneq %r8, %r14 +; ILP-NEXT: testb $64, %sil +; ILP-NEXT: cmovneq %rdx, %rbx +; ILP-NEXT: cmovneq %r8, %rdx +; ILP-NEXT: testb %r9b, %r9b +; ILP-NEXT: cmovsq %r8, %r11 +; ILP-NEXT: cmovsq %r8, %rdi +; ILP-NEXT: movq %r11, 8(%rax) +; ILP-NEXT: movq %rdi, (%rax) ; ILP-NEXT: cmovnsq %r8, %rbx ; ILP-NEXT: cmoveq %r8, %rbx ; ILP-NEXT: movq %rbx, 24(%rax) -; ILP-NEXT: cmovnsq %r10, %rdi -; ILP-NEXT: cmoveq %r8, %rdi -; ILP-NEXT: movq %rdi, 16(%rax) +; ILP-NEXT: cmovnsq %r14, %rdx +; ILP-NEXT: cmoveq %r8, %rdx +; ILP-NEXT: movq %rdx, 16(%rax) ; ILP-NEXT: popq %rbx ; ILP-NEXT: popq %r14 ; ILP-NEXT: retq ; ; HYBRID-LABEL: test1: ; HYBRID: # %bb.0: +; HYBRID-NEXT: pushq %rbx ; HYBRID-NEXT: movq %rdi, %rax ; HYBRID-NEXT: addl %esi, %esi -; HYBRID-NEXT: addb $2, %sil -; HYBRID-NEXT: orb $1, %sil -; HYBRID-NEXT: movb $-128, %cl +; HYBRID-NEXT: movb $125, %cl ; HYBRID-NEXT: subb %sil, %cl ; HYBRID-NEXT: xorl %r8d, %r8d -; HYBRID-NEXT: movl $1, %r11d +; HYBRID-NEXT: movl $1, %edi ; HYBRID-NEXT: movl $1, %r9d ; HYBRID-NEXT: shrdq %cl, %r8, %r9 ; HYBRID-NEXT: testb $64, %cl ; HYBRID-NEXT: cmovneq %r8, %r9 -; HYBRID-NEXT: xorl %r10d, %r10d -; HYBRID-NEXT: movl %esi, %ecx -; HYBRID-NEXT: shldq %cl, %r11, %r10 -; HYBRID-NEXT: leal -128(%rsi), %ecx -; HYBRID-NEXT: xorl %edi, %edi -; HYBRID-NEXT: shldq %cl, %r11, %rdi -; HYBRID-NEXT: movl $1, %edx -; HYBRID-NEXT: shlq %cl, %rdx -; HYBRID-NEXT: testb $64, %cl -; HYBRID-NEXT: cmovneq %rdx, %rdi -; HYBRID-NEXT: cmovneq %r8, %rdx +; HYBRID-NEXT: leal 3(%rsi), %r10d +; HYBRID-NEXT: xorl %r11d, %r11d +; HYBRID-NEXT: movl %r10d, %ecx +; HYBRID-NEXT: shldq %cl, %rdi, %r11 +; HYBRID-NEXT: addb $-125, %sil +; HYBRID-NEXT: xorl %edx, %edx ; HYBRID-NEXT: movl %esi, %ecx -; HYBRID-NEXT: shlq %cl, %r11 +; HYBRID-NEXT: shldq %cl, %rdi, %rdx +; HYBRID-NEXT: movl $1, %ebx +; HYBRID-NEXT: shlq %cl, %rbx ; HYBRID-NEXT: testb $64, %sil -; HYBRID-NEXT: cmovneq %r11, %r10 -; HYBRID-NEXT: cmovneq %r8, %r11 -; HYBRID-NEXT: testb %sil, %sil -; HYBRID-NEXT: cmovsq %r8, %r10 -; HYBRID-NEXT: movq %r10, 8(%rax) +; HYBRID-NEXT: cmovneq %rbx, %rdx +; HYBRID-NEXT: cmovneq %r8, %rbx +; HYBRID-NEXT: movl %r10d, %ecx +; HYBRID-NEXT: shlq %cl, %rdi +; HYBRID-NEXT: testb $64, %r10b +; HYBRID-NEXT: cmovneq %rdi, %r11 +; HYBRID-NEXT: cmovneq %r8, %rdi +; HYBRID-NEXT: testb %r10b, %r10b ; HYBRID-NEXT: cmovsq %r8, %r11 -; HYBRID-NEXT: movq %r11, (%rax) -; HYBRID-NEXT: cmovnsq %r8, %rdi -; HYBRID-NEXT: cmoveq %r8, %rdi -; HYBRID-NEXT: movq %rdi, 24(%rax) -; HYBRID-NEXT: cmovnsq %r9, %rdx +; HYBRID-NEXT: movq %r11, 8(%rax) +; HYBRID-NEXT: cmovsq %r8, %rdi +; HYBRID-NEXT: movq %rdi, (%rax) +; HYBRID-NEXT: cmovnsq %r8, %rdx ; HYBRID-NEXT: cmoveq %r8, %rdx -; HYBRID-NEXT: movq %rdx, 16(%rax) +; HYBRID-NEXT: movq %rdx, 24(%rax) +; HYBRID-NEXT: cmovnsq %r9, %rbx +; HYBRID-NEXT: cmoveq %r8, %rbx +; HYBRID-NEXT: movq %rbx, 16(%rax) +; HYBRID-NEXT: popq %rbx ; HYBRID-NEXT: retq ; ; BURR-LABEL: test1: ; BURR: # %bb.0: +; BURR-NEXT: pushq %rbx ; BURR-NEXT: movq %rdi, %rax ; BURR-NEXT: addl %esi, %esi -; BURR-NEXT: addb $2, %sil -; BURR-NEXT: orb $1, %sil -; BURR-NEXT: movb $-128, %cl +; BURR-NEXT: movb $125, %cl ; BURR-NEXT: subb %sil, %cl ; BURR-NEXT: xorl %r8d, %r8d -; BURR-NEXT: movl $1, %r11d +; BURR-NEXT: movl $1, %edi ; BURR-NEXT: movl $1, %r9d ; BURR-NEXT: shrdq %cl, %r8, %r9 ; BURR-NEXT: testb $64, %cl ; BURR-NEXT: cmovneq %r8, %r9 -; BURR-NEXT: xorl %r10d, %r10d +; BURR-NEXT: leal 3(%rsi), %r10d +; BURR-NEXT: xorl %r11d, %r11d +; BURR-NEXT: movl %r10d, %ecx +; BURR-NEXT: shldq %cl, %rdi, %r11 +; BURR-NEXT: addb $-125, %sil +; BURR-NEXT: xorl %edx, %edx ; BURR-NEXT: movl %esi, %ecx -; BURR-NEXT: shldq %cl, %r11, %r10 -; BURR-NEXT: leal -128(%rsi), %ecx -; BURR-NEXT: xorl %edi, %edi -; BURR-NEXT: shldq %cl, %r11, %rdi -; BURR-NEXT: movl $1, %edx -; BURR-NEXT: shlq %cl, %rdx -; BURR-NEXT: testb $64, %cl -; BURR-NEXT: cmovneq %rdx, %rdi -; BURR-NEXT: cmovneq %r8, %rdx -; BURR-NEXT: movl %esi, %ecx -; BURR-NEXT: shlq %cl, %r11 +; BURR-NEXT: shldq %cl, %rdi, %rdx +; BURR-NEXT: movl $1, %ebx +; BURR-NEXT: shlq %cl, %rbx ; BURR-NEXT: testb $64, %sil -; BURR-NEXT: cmovneq %r11, %r10 -; BURR-NEXT: cmovneq %r8, %r11 -; BURR-NEXT: testb %sil, %sil -; BURR-NEXT: cmovsq %r8, %r10 -; BURR-NEXT: movq %r10, 8(%rax) +; BURR-NEXT: cmovneq %rbx, %rdx +; BURR-NEXT: cmovneq %r8, %rbx +; BURR-NEXT: movl %r10d, %ecx +; BURR-NEXT: shlq %cl, %rdi +; BURR-NEXT: testb $64, %r10b +; BURR-NEXT: cmovneq %rdi, %r11 +; BURR-NEXT: cmovneq %r8, %rdi +; BURR-NEXT: testb %r10b, %r10b ; BURR-NEXT: cmovsq %r8, %r11 -; BURR-NEXT: movq %r11, (%rax) -; BURR-NEXT: cmovnsq %r8, %rdi -; BURR-NEXT: cmoveq %r8, %rdi -; BURR-NEXT: movq %rdi, 24(%rax) -; BURR-NEXT: cmovnsq %r9, %rdx +; BURR-NEXT: movq %r11, 8(%rax) +; BURR-NEXT: cmovsq %r8, %rdi +; BURR-NEXT: movq %rdi, (%rax) +; BURR-NEXT: cmovnsq %r8, %rdx ; BURR-NEXT: cmoveq %r8, %rdx -; BURR-NEXT: movq %rdx, 16(%rax) +; BURR-NEXT: movq %rdx, 24(%rax) +; BURR-NEXT: cmovnsq %r9, %rbx +; BURR-NEXT: cmoveq %r8, %rbx +; BURR-NEXT: movq %rbx, 16(%rax) +; BURR-NEXT: popq %rbx ; BURR-NEXT: retq ; ; SRC-LABEL: test1: @@ -149,9 +153,8 @@ ; SRC-NEXT: pushq %rbx ; SRC-NEXT: movq %rdi, %rax ; SRC-NEXT: addl %esi, %esi -; SRC-NEXT: addb $2, %sil -; SRC-NEXT: orb $1, %sil -; SRC-NEXT: movb $-128, %cl +; SRC-NEXT: leal 3(%rsi), %r9d +; SRC-NEXT: movb $125, %cl ; SRC-NEXT: subb %sil, %cl ; SRC-NEXT: xorl %r8d, %r8d ; SRC-NEXT: movl $1, %edi @@ -159,24 +162,24 @@ ; SRC-NEXT: shrdq %cl, %r8, %r10 ; SRC-NEXT: testb $64, %cl ; SRC-NEXT: cmovneq %r8, %r10 -; SRC-NEXT: leal -128(%rsi), %r9d +; SRC-NEXT: addb $-125, %sil ; SRC-NEXT: xorl %edx, %edx -; SRC-NEXT: movl %r9d, %ecx +; SRC-NEXT: movl %esi, %ecx ; SRC-NEXT: shldq %cl, %rdi, %rdx ; SRC-NEXT: xorl %r11d, %r11d -; SRC-NEXT: movl %esi, %ecx +; SRC-NEXT: movl %r9d, %ecx ; SRC-NEXT: shldq %cl, %rdi, %r11 ; SRC-NEXT: movl $1, %ebx ; SRC-NEXT: shlq %cl, %rbx -; SRC-NEXT: testb $64, %sil +; SRC-NEXT: testb $64, %r9b ; SRC-NEXT: cmovneq %rbx, %r11 ; SRC-NEXT: cmovneq %r8, %rbx -; SRC-NEXT: movl %r9d, %ecx +; SRC-NEXT: movl %esi, %ecx ; SRC-NEXT: shlq %cl, %rdi -; SRC-NEXT: testb $64, %r9b +; SRC-NEXT: testb $64, %sil ; SRC-NEXT: cmovneq %rdi, %rdx ; SRC-NEXT: cmovneq %r8, %rdi -; SRC-NEXT: testb %sil, %sil +; SRC-NEXT: testb %r9b, %r9b ; SRC-NEXT: cmovnsq %r10, %rdi ; SRC-NEXT: cmoveq %r8, %rdi ; SRC-NEXT: cmovnsq %r8, %rdx @@ -196,31 +199,29 @@ ; LIN-NEXT: xorl %r9d, %r9d ; LIN-NEXT: movl $1, %r8d ; LIN-NEXT: addl %esi, %esi -; LIN-NEXT: addb $2, %sil -; LIN-NEXT: orb $1, %sil -; LIN-NEXT: movl $1, %edx -; LIN-NEXT: movl %esi, %ecx -; LIN-NEXT: shlq %cl, %rdx -; LIN-NEXT: testb $64, %sil -; LIN-NEXT: movq %rdx, %rcx -; LIN-NEXT: cmovneq %r9, %rcx -; LIN-NEXT: testb %sil, %sil -; LIN-NEXT: cmovsq %r9, %rcx -; LIN-NEXT: movq %rcx, (%rdi) -; LIN-NEXT: xorl %edi, %edi -; LIN-NEXT: movl %esi, %ecx -; LIN-NEXT: shldq %cl, %r8, %rdi -; LIN-NEXT: cmovneq %rdx, %rdi -; LIN-NEXT: cmovsq %r9, %rdi -; LIN-NEXT: movq %rdi, 8(%rax) -; LIN-NEXT: leal -128(%rsi), %r10d +; LIN-NEXT: leal 3(%rsi), %ecx +; LIN-NEXT: movl $1, %edi +; LIN-NEXT: shlq %cl, %rdi +; LIN-NEXT: testb $64, %cl +; LIN-NEXT: movq %rdi, %rdx +; LIN-NEXT: cmovneq %r9, %rdx +; LIN-NEXT: testb %cl, %cl +; LIN-NEXT: cmovsq %r9, %rdx +; LIN-NEXT: movq %rdx, (%rax) +; LIN-NEXT: xorl %edx, %edx +; LIN-NEXT: # kill: def $cl killed $cl killed $ecx +; LIN-NEXT: shldq %cl, %r8, %rdx +; LIN-NEXT: cmovneq %rdi, %rdx +; LIN-NEXT: cmovsq %r9, %rdx +; LIN-NEXT: movq %rdx, 8(%rax) +; LIN-NEXT: leal -125(%rsi), %r10d ; LIN-NEXT: movl $1, %edx ; LIN-NEXT: movl %r10d, %ecx ; LIN-NEXT: shlq %cl, %rdx ; LIN-NEXT: testb $64, %r10b ; LIN-NEXT: movq %rdx, %rdi ; LIN-NEXT: cmovneq %r9, %rdi -; LIN-NEXT: movb $-128, %cl +; LIN-NEXT: movb $125, %cl ; LIN-NEXT: subb %sil, %cl ; LIN-NEXT: movl $1, %esi ; LIN-NEXT: shrdq %cl, %r9, %rsi Index: llvm/trunk/test/CodeGen/X86/signbit-shift.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/signbit-shift.ll +++ llvm/trunk/test/CodeGen/X86/signbit-shift.ll @@ -33,8 +33,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 ; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 -; CHECK-NEXT: psrld $31, %xmm0 -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [42,42,42,42] +; CHECK-NEXT: psubd %xmm0, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %c = icmp sgt <4 x i32> %x, %e = zext <4 x i1> %c to <4 x i32> Index: llvm/trunk/test/CodeGen/X86/split-store.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/split-store.ll +++ llvm/trunk/test/CodeGen/X86/split-store.ll @@ -217,10 +217,9 @@ ; CHECK-LABEL: int1_int1_pair: ; CHECK: # %bb.0: ; CHECK-NEXT: addb %sil, %sil -; CHECK-NEXT: andb $1, %dil -; CHECK-NEXT: orb %sil, %dil -; CHECK-NEXT: andb $3, %dil -; CHECK-NEXT: movb %dil, (%rdx) +; CHECK-NEXT: subb %dil, %sil +; CHECK-NEXT: andb $3, %sil +; CHECK-NEXT: movb %sil, (%rdx) ; CHECK-NEXT: retq %t1 = zext i1 %tmp2 to i2 %t2 = shl nuw i2 %t1, 1