Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7616,6 +7616,32 @@ } } + // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. + // sra (add (shl X, N1C), AddC), N1C --> + // sext (add (trunc X to (width - N1C)), AddC') + if (!LegalOperations && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + N0.getOperand(0).getOpcode() == ISD::SHL && + N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { + if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Shl = N0.getOperand(0); + // Determine what the truncate's type would be and ask the target if that + // is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + if (TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). + trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + return DAG.getSExtOrTrunc(Add, DL, VT); + } + } + } + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { Index: llvm/trunk/test/CodeGen/AArch64/shift-mod.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/shift-mod.ll +++ llvm/trunk/test/CodeGen/AArch64/shift-mod.ll @@ -78,9 +78,8 @@ define i64 @ashr_add_shl_i32(i64 %r) { ; CHECK-LABEL: ashr_add_shl_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4294967296 -; CHECK-NEXT: add x8, x8, x0, lsl #32 -; CHECK-NEXT: asr x0, x8, #32 +; CHECK-NEXT: add w8, w0, #1 // =1 +; CHECK-NEXT: sxtw x0, w8 ; CHECK-NEXT: ret %conv = shl i64 %r, 32 %sext = add i64 %conv, 4294967296 @@ -91,9 +90,8 @@ define i64 @ashr_add_shl_i8(i64 %r) { ; CHECK-LABEL: ashr_add_shl_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #72057594037927936 -; CHECK-NEXT: add x8, x8, x0, lsl #56 -; CHECK-NEXT: asr x0, x8, #56 +; CHECK-NEXT: add w8, w0, #1 // =1 +; CHECK-NEXT: sxtb x0, w8 ; CHECK-NEXT: ret %conv = shl i64 %r, 56 %sext = add i64 %conv, 72057594037927936 Index: llvm/trunk/test/CodeGen/X86/shift-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/shift-combine.ll +++ llvm/trunk/test/CodeGen/X86/shift-combine.ll @@ -168,10 +168,8 @@ ; ; X64-LABEL: ashr_add_shl_i32: ; X64: # %bb.0: -; X64-NEXT: shlq $32, %rdi -; X64-NEXT: movabsq $4294967296, %rax # imm = 0x100000000 -; X64-NEXT: addq %rdi, %rax -; X64-NEXT: sarq $32, %rax +; X64-NEXT: incl %edi +; X64-NEXT: movslq %edi, %rax ; X64-NEXT: retq %conv = shl i64 %r, 32 %sext = add i64 %conv, 4294967296 @@ -182,20 +180,17 @@ define i64 @ashr_add_shl_i8(i64 %r) nounwind { ; X32-LABEL: ashr_add_shl_i8: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: shll $24, %edx -; X32-NEXT: addl $33554432, %edx # imm = 0x2000000 -; X32-NEXT: movl %edx, %eax -; X32-NEXT: sarl $24, %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: addb $2, %al +; X32-NEXT: movsbl %al, %eax +; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: retl ; ; X64-LABEL: ashr_add_shl_i8: ; X64: # %bb.0: -; X64-NEXT: shlq $56, %rdi -; X64-NEXT: movabsq $144115188075855872, %rax # imm = 0x200000000000000 -; X64-NEXT: addq %rdi, %rax -; X64-NEXT: sarq $56, %rax +; X64-NEXT: addb $2, %dil +; X64-NEXT: movsbq %dil, %rax ; X64-NEXT: retq %conv = shl i64 %r, 56 %sext = add i64 %conv, 144115188075855872 @@ -209,34 +204,31 @@ ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: shll $24, %edi -; X32-NEXT: shll $24, %esi -; X32-NEXT: shll $24, %edx -; X32-NEXT: shll $24, %ecx -; X32-NEXT: addl $16777216, %ecx # imm = 0x1000000 -; X32-NEXT: addl $16777216, %edx # imm = 0x1000000 -; X32-NEXT: addl $16777216, %esi # imm = 0x1000000 -; X32-NEXT: addl $16777216, %edi # imm = 0x1000000 -; X32-NEXT: sarl $24, %edi -; X32-NEXT: sarl $24, %esi -; X32-NEXT: sarl $24, %edx -; X32-NEXT: sarl $24, %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movb {{[0-9]+}}(%esp), %dl +; X32-NEXT: movb {{[0-9]+}}(%esp), %ch +; X32-NEXT: movb {{[0-9]+}}(%esp), %dh +; X32-NEXT: incb %dh +; X32-NEXT: movsbl %dh, %esi +; X32-NEXT: incb %ch +; X32-NEXT: movsbl %ch, %edi +; X32-NEXT: incb %dl +; X32-NEXT: movsbl %dl, %edx +; X32-NEXT: incb %cl +; X32-NEXT: movsbl %cl, %ecx ; X32-NEXT: movl %ecx, 12(%eax) ; X32-NEXT: movl %edx, 8(%eax) -; X32-NEXT: movl %esi, 4(%eax) -; X32-NEXT: movl %edi, (%eax) +; X32-NEXT: movl %edi, 4(%eax) +; X32-NEXT: movl %esi, (%eax) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: retl $4 ; ; X64-LABEL: ashr_add_shl_v4i8: ; X64: # %bb.0: +; X64-NEXT: pcmpeqd %xmm1, %xmm1 +; X64-NEXT: psubd %xmm1, %xmm0 ; X64-NEXT: pslld $24, %xmm0 -; X64-NEXT: paddd {{.*}}(%rip), %xmm0 ; X64-NEXT: psrad $24, %xmm0 ; X64-NEXT: retq %conv = shl <4 x i32> %r,