Index: llvm/trunk/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/trunk/lib/Analysis/ValueTracking.cpp +++ llvm/trunk/lib/Analysis/ValueTracking.cpp @@ -5689,7 +5689,28 @@ return; } - // TODO Handle min/max flavors. + const APInt *C; + if (!match(LHS, m_APInt(C)) && !match(RHS, m_APInt(C))) + return; + + switch (R.Flavor) { + case SPF_UMIN: + Upper = *C + 1; + break; + case SPF_UMAX: + Lower = *C; + break; + case SPF_SMIN: + Lower = APInt::getSignedMinValue(BitWidth); + Upper = *C + 1; + break; + case SPF_SMAX: + Lower = *C; + Upper = APInt::getSignedMaxValue(BitWidth) + 1; + break; + default: + break; + } } ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo) { Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -169,6 +169,12 @@ cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")); +static cl::opt<bool> EnableScalarIRPasses( + "amdgpu-scalar-ir-passes", + cl::desc("Enable scalar IR passes"), + cl::init(true), + cl::Hidden); + extern "C" void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget()); @@ -670,7 +676,8 @@ if (EnableSROA) addPass(createSROAPass()); - addStraightLineScalarOptimizationPasses(); + if (EnableScalarIRPasses) + addStraightLineScalarOptimizationPasses(); if (EnableAMDGPUAliasAnalysis) { addPass(createAMDGPUAAWrapperPass()); @@ -696,7 +703,7 @@ // %1 = shl %a, 2 // // but EarlyCSE can do neither of them. - if (getOptLevel() != CodeGenOpt::None) + if (getOptLevel() != CodeGenOpt::None && EnableScalarIRPasses) addEarlyCSEOrGVNPass(); } Index: llvm/trunk/test/CodeGen/AMDGPU/med3-no-simplify.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/med3-no-simplify.ll +++ llvm/trunk/test/CodeGen/AMDGPU/med3-no-simplify.ll @@ -0,0 +1,48 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-scalar-ir-passes=false < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -amdgpu-scalar-ir-passes=false < %s | FileCheck -check-prefix=GCN -check-prefix=SICIVI -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -amdgpu-scalar-ir-passes=false < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s + +; These tests are split out from umed3.ll and smed3.ll and use the +; -amdgpu-scalar-ir-passes=false flag, because InstSimplify would constant +; fold these functions otherwise. + +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + +; GCN-LABEL: {{^}}v_test_umed3_r_i_i_constant_order_i32: +; GCN: v_max_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} +; GCN: v_min_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} +define amdgpu_kernel void @v_test_umed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0 + + %icmp0 = icmp ugt i32 %a, 17 + %i0 = select i1 %icmp0, i32 %a, i32 17 + + %icmp1 = icmp ult i32 %i0, 12 + %i1 = select i1 %icmp1, i32 %i0, i32 12 + + store i32 %i1, i32 addrspace(1)* %outgep + ret void +} + +; GCN-LABEL: {{^}}v_test_smed3_r_i_i_constant_order_i32: +; GCN: v_max_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} +; GCN: v_min_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} +define amdgpu_kernel void @v_test_smed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid + %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid + %a = load i32, i32 addrspace(1)* %gep0 + + %icmp0 = icmp sgt i32 %a, 17 + %i0 = select i1 %icmp0, i32 %a, i32 17 + + %icmp1 = icmp slt i32 %i0, 12 + %i1 = select i1 %icmp1, i32 %i0, i32 12 + + store i32 %i1, i32 addrspace(1)* %outgep + ret void +} + Index: llvm/trunk/test/CodeGen/AMDGPU/smed3.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/smed3.ll +++ llvm/trunk/test/CodeGen/AMDGPU/smed3.ll @@ -42,25 +42,6 @@ ret void } -; GCN-LABEL: {{^}}v_test_smed3_r_i_i_constant_order_i32: -; GCN: v_max_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} -; GCN: v_min_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} -define amdgpu_kernel void @v_test_smed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid - %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load i32, i32 addrspace(1)* %gep0 - - %icmp0 = icmp sgt i32 %a, 17 - %i0 = select i1 %icmp0, i32 %a, i32 17 - - %icmp1 = icmp slt i32 %i0, 12 - %i1 = select i1 %icmp1, i32 %i0, i32 12 - - store i32 %i1, i32 addrspace(1)* %outgep - ret void -} - ; GCN-LABEL: {{^}}v_test_smed3_r_i_i_sign_mismatch_i32: ; GCN: v_max_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} ; GCN: v_min_i32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} Index: llvm/trunk/test/CodeGen/AMDGPU/umed3.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/umed3.ll +++ llvm/trunk/test/CodeGen/AMDGPU/umed3.ll @@ -42,25 +42,6 @@ ret void } -; GCN-LABEL: {{^}}v_test_umed3_r_i_i_constant_order_i32: -; GCN: v_max_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} -; GCN: v_min_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} -define amdgpu_kernel void @v_test_umed3_r_i_i_constant_order_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) #1 { - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid - %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load i32, i32 addrspace(1)* %gep0 - - %icmp0 = icmp ugt i32 %a, 17 - %i0 = select i1 %icmp0, i32 %a, i32 17 - - %icmp1 = icmp ult i32 %i0, 12 - %i1 = select i1 %icmp1, i32 %i0, i32 12 - - store i32 %i1, i32 addrspace(1)* %outgep - ret void -} - ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_sign_mismatch_i32: ; GCN: v_max_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} ; GCN: v_min_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} Index: llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll +++ llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll @@ -533,6 +533,37 @@ ret i32 %res } +; Check that there is no infinite loop because of reverse cmp transformation: +; (icmp slt smax(PositiveA, B) 2) -> (icmp eq B 1) +define i32 @clamp_check_for_no_infinite_loop3(i32 %i) { +; CHECK-LABEL: @clamp_check_for_no_infinite_loop3( +; CHECK-NEXT: [[I2:%.*]] = icmp sgt i32 [[I:%.*]], 1 +; CHECK-NEXT: [[I3:%.*]] = select i1 [[I2]], i32 [[I]], i32 1 +; CHECK-NEXT: br i1 true, label [[TRUELABEL:%.*]], label [[FALSELABEL:%.*]] +; CHECK: truelabel: +; CHECK-NEXT: [[I5:%.*]] = icmp slt i32 [[I3]], 2 +; CHECK-NEXT: [[I6:%.*]] = select i1 [[I5]], i32 [[I3]], i32 2 +; CHECK-NEXT: [[I7:%.*]] = shl nuw nsw i32 [[I6]], 2 +; CHECK-NEXT: ret i32 [[I7]] +; CHECK: falselabel: +; CHECK-NEXT: ret i32 0 +; + + %i2 = icmp sgt i32 %i, 1 + %i3 = select i1 %i2, i32 %i, i32 1 + %i4 = icmp sgt i32 %i3, 0 + br i1 %i4, label %truelabel, label %falselabel + +truelabel: ; %i<=1, %i3>0 + %i5 = icmp slt i32 %i3, 2 + %i6 = select i1 %i5, i32 %i3, i32 2 + %i7 = shl nuw nsw i32 %i6, 2 + ret i32 %i7 + +falselabel: + ret i32 0 +} + ; The next 3 min tests should canonicalize to the same form...and not infinite loop. define double @PR31751_umin1(i32 %x) { Index: llvm/trunk/test/Transforms/InstCombine/sub.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/sub.ll +++ llvm/trunk/test/Transforms/InstCombine/sub.ll @@ -1213,7 +1213,7 @@ ; CHECK-LABEL: @test66( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], -101 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -101 -; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP2]], 1 +; CHECK-NEXT: [[RES:%.*]] = add nuw i32 [[TMP2]], 1 ; CHECK-NEXT: ret i32 [[RES]] ; %1 = xor i32 %x, -1 Index: llvm/trunk/test/Transforms/InstSimplify/cmp_of_min_max.ll =================================================================== --- llvm/trunk/test/Transforms/InstSimplify/cmp_of_min_max.ll +++ llvm/trunk/test/Transforms/InstSimplify/cmp_of_min_max.ll @@ -3,10 +3,7 @@ define i1 @test_umax1(i32 %n) { ; CHECK-LABEL: @test_umax1( -; CHECK-NEXT: [[C1:%.*]] = icmp ugt i32 [[N:%.*]], 10 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C1]], i32 [[N]], i32 10 -; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[S]], 9 -; CHECK-NEXT: ret i1 [[C2]] +; CHECK-NEXT: ret i1 true ; %c1 = icmp ugt i32 %n, 10 %s = select i1 %c1, i32 %n, i32 10 @@ -40,10 +37,7 @@ define i1 @test_umin1(i32 %n) { ; CHECK-LABEL: @test_umin1( -; CHECK-NEXT: [[C1:%.*]] = icmp ult i32 [[N:%.*]], 10 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C1]], i32 [[N]], i32 10 -; CHECK-NEXT: [[C2:%.*]] = icmp ult i32 [[S]], 11 -; CHECK-NEXT: ret i1 [[C2]] +; CHECK-NEXT: ret i1 true ; %c1 = icmp ult i32 %n, 10 %s = select i1 %c1, i32 %n, i32 10 @@ -77,10 +71,7 @@ define i1 @test_smax1(i32 %n) { ; CHECK-LABEL: @test_smax1( -; CHECK-NEXT: [[C1:%.*]] = icmp sgt i32 [[N:%.*]], -10 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C1]], i32 [[N]], i32 -10 -; CHECK-NEXT: [[C2:%.*]] = icmp sgt i32 [[S]], -11 -; CHECK-NEXT: ret i1 [[C2]] +; CHECK-NEXT: ret i1 true ; %c1 = icmp sgt i32 %n, -10 %s = select i1 %c1, i32 %n, i32 -10 @@ -114,10 +105,7 @@ define i1 @test_smin1(i32 %n) { ; CHECK-LABEL: @test_smin1( -; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[N:%.*]], 10 -; CHECK-NEXT: [[S:%.*]] = select i1 [[C1]], i32 [[N]], i32 10 -; CHECK-NEXT: [[C2:%.*]] = icmp slt i32 [[S]], 11 -; CHECK-NEXT: ret i1 [[C2]] +; CHECK-NEXT: ret i1 true ; %c1 = icmp slt i32 %n, 10 %s = select i1 %c1, i32 %n, i32 10