Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -5600,9 +5600,6 @@ break; case Instruction::Call: { Result = SimplifyCall(cast(I), Q); - // Don't perform known bits simplification below for musttail calls. - if (cast(I)->isMustTailCall()) - return Result; break; } case Instruction::Freeze: @@ -5620,14 +5617,6 @@ break; } - // In general, it is possible for computeKnownBits to determine all bits in a - // value even when the operands are not all constants. - if (!Result && I->getType()->isIntOrIntVectorTy()) { - KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); - if (Known.isConstant()) - Result = ConstantInt::get(I->getType(), Known.getConstant()); - } - /// If called on unreachable code, the above logic may report that the /// instruction simplified to itself. Make life easier for users by /// detecting that case here, returning a safe value instead. Index: lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -1005,6 +1005,17 @@ return false; } +static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) { + // Try hard to determine if the sign is known, + // to generate a more efficient expansion. + KnownBits Known = computeKnownBits(V, *DL); + if (Known.isNegative()) + return Constant::getAllOnesValue(V->getType()); + if (Known.isNonNegative()) + return Constant::getNullValue(V->getType()); + return Builder.CreateAShr(V, Builder.getInt32(31)); +} + Value* AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder, BinaryOperator &I, Value *Num, Value *Den) const { @@ -1046,9 +1057,8 @@ Value *Sign = nullptr; if (IsSigned) { - ConstantInt *K31 = Builder.getInt32(31); - Value *LHSign = Builder.CreateAShr(Num, K31); - Value *RHSign = Builder.CreateAShr(Den, K31); + Value *LHSign = getSign32(Num, Builder, DL); + Value *RHSign = getSign32(Den, Builder, DL); // Remainder sign is the same as LHS Sign = IsDiv ? Builder.CreateXor(LHSign, RHSign) : LHSign; Index: test/Analysis/ValueTracking/knownzero-shift.ll =================================================================== --- test/Analysis/ValueTracking/knownzero-shift.ll +++ test/Analysis/ValueTracking/knownzero-shift.ll @@ -15,9 +15,15 @@ !0 = !{ i8 1, i8 5 } +; The following cases only get folded by InstCombine, +; see InstCombine/shift-shift.ll. If we wanted to, +; we could explicitly handle them in InstSimplify as well. + define i32 @shl_shl(i32 %A) { ; CHECK-LABEL: @shl_shl( -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: [[B:%.*]] = shl i32 [[A:%.*]], 6 +; CHECK-NEXT: [[C:%.*]] = shl i32 [[B]], 28 +; CHECK-NEXT: ret i32 [[C]] ; %B = shl i32 %A, 6 %C = shl i32 %B, 28 @@ -26,7 +32,9 @@ define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) { ; CHECK-LABEL: @shl_shl_splat_vec( -; CHECK-NEXT: ret <2 x i33> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], +; CHECK-NEXT: ret <2 x i33> [[C]] ; %B = shl <2 x i33> %A, %C = shl <2 x i33> %B, @@ -37,7 +45,7 @@ define <2 x i33> @shl_shl_vec(<2 x i33> %A) { ; CHECK-LABEL: @shl_shl_vec( -; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> %A, +; CHECK-NEXT: [[B:%.*]] = shl <2 x i33> [[A:%.*]], ; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], ; CHECK-NEXT: ret <2 x i33> [[C]] ; @@ -48,7 +56,9 @@ define i232 @lshr_lshr(i232 %A) { ; CHECK-LABEL: @lshr_lshr( -; CHECK-NEXT: ret i232 0 +; CHECK-NEXT: [[B:%.*]] = lshr i232 [[A:%.*]], 231 +; CHECK-NEXT: [[C:%.*]] = lshr i232 [[B]], 1 +; CHECK-NEXT: ret i232 [[C]] ; %B = lshr i232 %A, 231 %C = lshr i232 %B, 1 @@ -57,7 +67,9 @@ define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) { ; CHECK-LABEL: @lshr_lshr_splat_vec( -; CHECK-NEXT: ret <2 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = lshr <2 x i32> %A, %C = lshr <2 x i32> %B, @@ -66,7 +78,9 @@ define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) { ; CHECK-LABEL: @lshr_lshr_vec( -; CHECK-NEXT: ret <2 x i32> zeroinitializer +; CHECK-NEXT: [[B:%.*]] = lshr <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = lshr <2 x i32> %A, %C = lshr <2 x i32> %B, Index: test/Transforms/GVN/PRE/volatile.ll =================================================================== --- test/Transforms/GVN/PRE/volatile.ll +++ test/Transforms/GVN/PRE/volatile.ll @@ -201,7 +201,7 @@ ; CHECK-LABEL: @test9( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOAD:%.*]] = load volatile i32, i32* [[V:%.*]], !range !0 -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: ret i32 [[LOAD]] ; entry: %load = load volatile i32, i32* %V, !range !0 Index: llvm/test/Transforms/InstSimplify/assume.ll =================================================================== --- llvm/test/Transforms/InstSimplify/assume.ll +++ /dev/null @@ -1,93 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -instsimplify -S < %s 2>&1 -pass-remarks-analysis=.* | FileCheck %s - -; Verify that warnings are emitted for the 2nd and 3rd tests. - -; CHECK: remark: /tmp/s.c:1:13: Detected conflicting code assumptions. -; CHECK: remark: /tmp/s.c:4:10: Detected conflicting code assumptions. -; CHECK: remark: /tmp/s.c:5:50: Detected conflicting code assumptions. - -define void @test1() { -; CHECK-LABEL: @test1( -; CHECK-NEXT: ret void -; - call void @llvm.assume(i1 1) - ret void - -} - -; The alloca guarantees that the low bits of %a are zero because of alignment. -; The assume says the opposite. The assume is processed last, so that's the -; return value. There's no way to win (we can't undo transforms that happened -; based on half-truths), so just don't crash. - -define i64 @PR31809() !dbg !7 { -; CHECK-LABEL: @PR31809( -; CHECK-NEXT: ret i64 3 -; - %a = alloca i32 - %t1 = ptrtoint i32* %a to i64, !dbg !9 - %cond = icmp eq i64 %t1, 3 - call void @llvm.assume(i1 %cond) - ret i64 %t1 -} - -; Similar to above: there's no way to know which assumption is truthful, -; so just don't crash. - -define i8 @conflicting_assumptions(i8 %x) !dbg !10 { -; CHECK-LABEL: @conflicting_assumptions( -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], 1, !dbg !10 -; CHECK-NEXT: call void @llvm.assume(i1 false) -; CHECK-NEXT: [[COND2:%.*]] = icmp eq i8 [[X]], 4 -; CHECK-NEXT: call void @llvm.assume(i1 [[COND2]]) -; CHECK-NEXT: ret i8 [[ADD]] -; - %add = add i8 %x, 1, !dbg !11 - %cond1 = icmp eq i8 %x, 3 - call void @llvm.assume(i1 %cond1) - %cond2 = icmp eq i8 %x, 4 - call void @llvm.assume(i1 %cond2) - ret i8 %add -} - -; Another case of conflicting assumptions. This would crash because we'd -; try to set more known bits than existed in the known bits struct. - -define void @PR36270(i32 %b) !dbg !13 { -; CHECK-LABEL: @PR36270( -; CHECK-NEXT: tail call void @llvm.assume(i1 false) -; CHECK-NEXT: unreachable -; - %B7 = xor i32 -1, 2147483647 - %and1 = and i32 %b, 3 - %B12 = lshr i32 %B7, %and1, !dbg !14 - %C1 = icmp ult i32 %and1, %B12 - tail call void @llvm.assume(i1 %C1) - %cmp2 = icmp eq i32 0, %B12 - tail call void @llvm.assume(i1 %cmp2) - unreachable -} - -declare void @llvm.assume(i1) nounwind - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 282540) (llvm/trunk 282542)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) -!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{i32 1, !"PIC Level", i32 2} -!6 = !{!"clang version 4.0.0 (trunk 282540) (llvm/trunk 282542)"} -!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) -!8 = !DISubroutineType(types: !2) -!9 = !DILocation(line: 1, column: 13, scope: !7) -!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, unit: !0, retainedNodes: !2) -!11 = !DILocation(line: 4, column: 10, scope: !10) -!12 = !DILocation(line: 4, column: 3, scope: !10) -!13 = distinct !DISubprogram(name: "PR36270", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, unit: !0, retainedNodes: !2) -!14 = !DILocation(line: 5, column: 50, scope: !13) - Index: test/Transforms/InstSimplify/call.ll =================================================================== --- test/Transforms/InstSimplify/call.ll +++ test/Transforms/InstSimplify/call.ll @@ -988,7 +988,7 @@ define i32 @returned_const_int_arg() { ; CHECK-LABEL: @returned_const_int_arg( ; CHECK-NEXT: [[X:%.*]] = call i32 @passthru_i32(i32 42) -; CHECK-NEXT: ret i32 42 +; CHECK-NEXT: ret i32 [[X]] ; %x = call i32 @passthru_i32(i32 42) ret i32 %x Index: test/Transforms/InstSimplify/or.ll =================================================================== --- test/Transforms/InstSimplify/or.ll +++ test/Transforms/InstSimplify/or.ll @@ -98,10 +98,17 @@ ret i8 %D } +; The following two cases only get folded by InstCombine, +; see InstCombine/or-xor.ll. + ; (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2) define i8 @test11(i8 %A) { ; CHECK-LABEL: @test11( -; CHECK-NEXT: ret i8 -1 +; CHECK-NEXT: [[B:%.*]] = or i8 [[A:%.*]], -2 +; CHECK-NEXT: [[C:%.*]] = xor i8 [[B]], 13 +; CHECK-NEXT: [[D:%.*]] = or i8 [[C]], 1 +; CHECK-NEXT: [[E:%.*]] = xor i8 [[D]], 12 +; CHECK-NEXT: ret i8 [[E]] ; %B = or i8 %A, -2 %C = xor i8 %B, 13 @@ -112,7 +119,12 @@ define i8 @test11v(<2 x i8> %A) { ; CHECK-LABEL: @test11v( -; CHECK-NEXT: ret i8 -1 +; CHECK-NEXT: [[B:%.*]] = or <2 x i8> [[A:%.*]], +; CHECK-NEXT: [[CV:%.*]] = xor <2 x i8> [[B]], +; CHECK-NEXT: [[C:%.*]] = extractelement <2 x i8> [[CV]], i32 0 +; CHECK-NEXT: [[D:%.*]] = or i8 [[C]], 1 +; CHECK-NEXT: [[E:%.*]] = xor i8 [[D]], 12 +; CHECK-NEXT: ret i8 [[E]] ; %B = or <2 x i8> %A, %CV = xor <2 x i8> %B, Index: test/Transforms/InstSimplify/shift-knownbits.ll =================================================================== --- test/Transforms/InstSimplify/shift-knownbits.ll +++ test/Transforms/InstSimplify/shift-knownbits.ll @@ -145,7 +145,8 @@ ret i1 %shl } -; Simplify count leading/trailing zeros to zero if all valid bits are shifted out. +; The following cases only get folded by InstCombine, +; see InstCombine/lshr.ll. declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone @@ -154,7 +155,9 @@ define i32 @lshr_ctlz_zero_is_undef(i32 %x) { ; CHECK-LABEL: @lshr_ctlz_zero_is_undef( -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: [[CT:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true) +; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[CT]], 5 +; CHECK-NEXT: ret i32 [[SH]] ; %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true) %sh = lshr i32 %ct, 5 @@ -163,7 +166,9 @@ define i32 @lshr_cttz_zero_is_undef(i32 %x) { ; CHECK-LABEL: @lshr_cttz_zero_is_undef( -; CHECK-NEXT: ret i32 0 +; CHECK-NEXT: [[CT:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true) +; CHECK-NEXT: [[SH:%.*]] = lshr i32 [[CT]], 5 +; CHECK-NEXT: ret i32 [[SH]] ; %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true) %sh = lshr i32 %ct, 5 @@ -172,7 +177,9 @@ define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec( -; CHECK-NEXT: ret <2 x i8> zeroinitializer +; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], +; CHECK-NEXT: ret <2 x i8> [[SH]] ; %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) %sh = lshr <2 x i8> %ct, @@ -181,7 +188,10 @@ define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec( -; CHECK-NEXT: ret i8 0 +; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true) +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], +; CHECK-NEXT: [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0 +; CHECK-NEXT: ret i8 [[EX]] ; %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true) %sh = lshr <2 x i8> %ct, @@ -191,7 +201,9 @@ define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec( -; CHECK-NEXT: ret <2 x i8> zeroinitializer +; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true) +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], +; CHECK-NEXT: ret <2 x i8> [[SH]] ; %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) %sh = lshr <2 x i8> %ct, @@ -200,7 +212,10 @@ define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) { ; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec( -; CHECK-NEXT: ret i8 0 +; CHECK-NEXT: [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true) +; CHECK-NEXT: [[SH:%.*]] = lshr <2 x i8> [[CT]], +; CHECK-NEXT: [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0 +; CHECK-NEXT: ret i8 [[EX]] ; %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true) %sh = lshr <2 x i8> %ct,