Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -39,10 +39,19 @@ if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I)) return Res; + // (C1 shift (A add C2)) -> (C1 shift C2) shift A) + // iff A and C2 are both positive. + Value *A; + Constant *C; + if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C)))) + if (isKnownNonNegative(A, DL) && isKnownNonNegative(C, DL)) + return BinaryOperator::Create( + I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A); + // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2. // Because shifts by negative values (which could occur if A were negative) // are undefined. - Value *A; const APInt *B; + const APInt *B; if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) { // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't // demand the sign bit (and many others) here?? Index: llvm/trunk/test/Transforms/InstCombine/shift-add.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/shift-add.ll +++ llvm/trunk/test/Transforms/InstCombine/shift-add.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This test makes sure that these instructions are properly eliminated. +; +; RUN: opt < %s -instcombine -S | FileCheck %s + +define i32 @shl_C1_add_A_C2_i32(i16 %A) { +; CHECK-LABEL: @shl_C1_add_A_C2_i32( +; CHECK-NEXT: [[B:%.*]] = zext i16 %A to i32 +; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]] +; CHECK-NEXT: ret i32 [[D]] +; + %B = zext i16 %A to i32 + %C = add i32 %B, 5 + %D = shl i32 6, %C + ret i32 %D +} + +define i32 @ashr_C1_add_A_C2_i32(i32 %A) { +; CHECK-LABEL: @ashr_C1_add_A_C2_i32( +; CHECK-NEXT: ret i32 0 +; + %B = and i32 %A, 65535 + %C = add i32 %B, 5 + %D = ashr i32 6, %C + ret i32 %D +} + +define i32 @lshr_C1_add_A_C2_i32(i32 %A) { +; CHECK-LABEL: @lshr_C1_add_A_C2_i32( +; CHECK-NEXT: [[B:%.*]] = and i32 %A, 65535 +; CHECK-NEXT: [[D:%.*]] = shl i32 192, [[B]] +; CHECK-NEXT: ret i32 [[D]] +; + %B = and i32 %A, 65535 + %C = add i32 %B, 5 + %D = shl i32 6, %C + ret i32 %D +} + +define <4 x i32> @shl_C1_add_A_C2_v4i32(<4 x i16> %A) { +; CHECK-LABEL: @shl_C1_add_A_C2_v4i32( +; CHECK-NEXT: [[B:%.*]] = zext <4 x i16> %A to <4 x i32> +; CHECK-NEXT: [[D:%.*]] = shl <4 x i32> , [[B]] +; CHECK-NEXT: ret <4 x i32> [[D]] +; + %B = zext <4 x i16> %A to <4 x i32> + %C = add <4 x i32> %B, + %D = shl <4 x i32> , %C + ret <4 x i32> %D +} + +define <4 x i32> @ashr_C1_add_A_C2_v4i32(<4 x i32> %A) { +; CHECK-LABEL: @ashr_C1_add_A_C2_v4i32( +; CHECK-NEXT: [[B:%.*]] = and <4 x i32> %A, +; CHECK-NEXT: [[D:%.*]] = ashr <4 x i32> , [[B]] +; CHECK-NEXT: ret <4 x i32> [[D]] +; + %B = and <4 x i32> %A, + %C = add <4 x i32> %B, + %D = ashr <4 x i32> , %C + ret <4 x i32> %D +} + +define <4 x i32> @lshr_C1_add_A_C2_v4i32(<4 x i32> %A) { +; CHECK-LABEL: @lshr_C1_add_A_C2_v4i32( +; CHECK-NEXT: [[B:%.*]] = and <4 x i32> %A, +; CHECK-NEXT: [[D:%.*]] = lshr <4 x i32> , [[B]] +; CHECK-NEXT: ret <4 x i32> [[D]] +; + %B = and <4 x i32> %A, + %C = add <4 x i32> %B, + %D = lshr <4 x i32> , %C + ret <4 x i32> %D +}