Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -811,6 +811,15 @@ return &I; } } + + // Transform (x << y) >> y to x & (-1 >> y) + Value *X; + if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))))) { + Constant *AllOnes = ConstantInt::getAllOnesValue(Ty); + Value *Mask = Builder.CreateLShr(AllOnes, Op1); + return BinaryOperator::CreateAnd(Mask, X); + } + return nullptr; } Index: llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -895,8 +895,8 @@ ; CHECK-LABEL: @ubfe_offset_0( ; CHECK-NEXT: %1 = sub i32 32, %width -; CHECK-NEXT: %2 = shl i32 %src, %1 -; CHECK-NEXT: %bfe = lshr i32 %2, %1 +; CHECK-NEXT: %2 = lshr i32 -1, %1 +; CHECK-NEXT: %bfe = and i32 %2, %src ; CHECK-NEXT: ret i32 %bfe define i32 @ubfe_offset_0(i32 %src, i32 %width) { %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width) @@ -905,8 +905,8 @@ ; CHECK-LABEL: @ubfe_offset_32( ; CHECK-NEXT: %1 = sub i32 32, %width -; CHECK-NEXT: %2 = shl i32 %src, %1 -; CHECK-NEXT: %bfe = lshr i32 %2, %1 +; CHECK-NEXT: %2 = lshr i32 -1, %1 +; CHECK-NEXT: %bfe = and i32 %2, %src ; CHECK-NEXT: ret i32 %bfe define i32 @ubfe_offset_32(i32 %src, i32 %width) { %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width) @@ -915,8 +915,8 @@ ; CHECK-LABEL: @ubfe_offset_31( ; CHECK-NEXT: %1 = sub i32 32, %width -; CHECK-NEXT: %2 = shl i32 %src, %1 -; CHECK-NEXT: %bfe = lshr i32 %2, %1 +; CHECK-NEXT: %2 = lshr i32 -1, %1 +; CHECK-NEXT: %bfe = and i32 %2, %src ; CHECK-NEXT: ret i32 %bfe define i32 @ubfe_offset_31(i32 %src, i32 %width) { %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width) @@ -1002,8 +1002,8 @@ ; CHECK-LABEL: @ubfe_offset_0_i64( ; CHECK-NEXT: %1 = sub i32 64, %width ; CHECK-NEXT: %2 = zext i32 %1 to i64 -; CHECK-NEXT: %3 = shl i64 %src, %2 -; CHECK-NEXT: %bfe = lshr i64 %3, %2 +; CHECK-NEXT: %3 = lshr i64 -1, %2 +; CHECK-NEXT: %bfe = and i64 %3, %src ; CHECK-NEXT: ret i64 %bfe define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) { %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width) Index: llvm/trunk/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll +++ llvm/trunk/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll @@ -14,8 +14,8 @@ define i32 @positive_samevar(i32 %x, i32 %y) { ; CHECK-LABEL: @positive_samevar( -; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[RET:%.*]] = lshr i32 [[TMP0]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 -1, [[Y:%.*]] +; CHECK-NEXT: [[RET:%.*]] = and i32 [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret i32 [[RET]] ; %tmp0 = shl i32 %x, %y @@ -124,8 +124,8 @@ define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @positive_samevar_vec( -; CHECK-NEXT: [[TMP0:%.*]] = shl <2 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[RET:%.*]] = lshr <2 x i32> [[TMP0]], [[Y]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i32> , [[Y:%.*]] +; CHECK-NEXT: [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[RET]] ; %tmp0 = shl <2 x i32> %x, %y @@ -171,8 +171,7 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) { ; CHECK-LABEL: @positive_sameconst_vec_undef2( -; CHECK-NEXT: [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[RET:%.*]] = lshr <3 x i32> [[TMP0]], +; CHECK-NEXT: [[RET:%.*]] = and <3 x i32> [[X:%.*]], ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %tmp0 = shl <3 x i32> %x,