Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -908,6 +908,7 @@ OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level))); OptimizePM.addPass(WarnMissedTransformationsPass()); OptimizePM.addPass(InstCombinePass()); + OptimizePM.addPass(ReassociatePass()); OptimizePM.addPass(RequireAnalysisPass()); OptimizePM.addPass(createFunctionToLoopPassAdaptor( LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -727,6 +727,7 @@ if (!DisableUnrollLoops) { // LoopUnroll may generate some redundency to cleanup. addInstructionCombiningPass(MPM); + MPM.add(createReassociatePass()); // Runtime unrolling will introduce runtime check in loop prologue. If the // unrolled loop is a inner loop, then the prologue will be inside the Index: test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- test/CodeGen/AMDGPU/simplify-libcalls.ll +++ test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -298,9 +298,9 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 -; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx2, %tmp +; GCN: %__powx22 = fmul fast float %[[r0]], %__powx21 +; GCN: %__powprod3 = fmul fast float %__powx22, %__powx21 define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -313,9 +313,9 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 -; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx2, %tmp +; GCN: %__powx22 = fmul fast float %[[r0]], %__powx21 +; GCN: %__powprod3 = fmul fast float %__powx22, %__powx21 define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -330,9 +330,9 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 -; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx2, %tmp +; GCN: %__powx22 = fmul fast float %[[r0]], %__powx21 +; GCN: %__powprod3 = fmul fast float %__powx22, %__powx21 define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -353,7 +353,7 @@ ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 -; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] +; GCN-PRELINK: %[[r2:.*]] = or i32 %[[r1]], %__pow_sign ; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* ; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { @@ -396,7 +396,7 @@ ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 -; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] +; GCN-PRELINK: %[[r2:.*]] = or i32 %[[r1]], %__pow_sign ; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* ; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) { Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -248,6 +248,7 @@ ; CHECK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-O-NEXT: Running pass: WarnMissedTransformationsPass ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O-NEXT: Running pass: ReassociatePass ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass ; CHECK-O-NEXT: Starting llvm::Function pass manager run. Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -222,6 +222,7 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass ; CHECK-POSTLINK-O-NEXT: Running pass: WarnMissedTransformationsPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass +; CHECK-POSTLINK-O-NEXT: Running pass: ReassociatePass ; CHECK-POSTLINK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis ; CHECK-POSTLINK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LICMPass ; CHECK-POSTLINK-O-NEXT: Starting llvm::Function pass manager run Index: test/Other/opt-O2-pipeline.ll =================================================================== --- test/Other/opt-O2-pipeline.ll +++ test/Other/opt-O2-pipeline.ll @@ -246,9 +246,12 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Reassociate expressions ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion Index: test/Other/opt-O3-pipeline.ll =================================================================== --- test/Other/opt-O3-pipeline.ll +++ test/Other/opt-O3-pipeline.ll @@ -251,9 +251,12 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Reassociate expressions ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion Index: test/Other/opt-Os-pipeline.ll =================================================================== --- test/Other/opt-Os-pipeline.ll +++ test/Other/opt-Os-pipeline.ll @@ -233,9 +233,12 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Reassociate expressions ; CHECK-NEXT: Canonicalize natural loops ; CHECK-NEXT: LCSSA Verifier ; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion Index: test/Transforms/PhaseOrdering/reassociate-after-unroll.ll =================================================================== --- test/Transforms/PhaseOrdering/reassociate-after-unroll.ll +++ test/Transforms/PhaseOrdering/reassociate-after-unroll.ll @@ -30,7 +30,7 @@ ; CHECK-NEXT: [[G_06_EPIL:%.*]] = phi i64 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[K_05_EPIL:%.*]] = phi i64 [ [[AND_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[K_05_UNR]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] ; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[AND_EPIL]] = and i64 [[CONV]], [[K_05_EPIL]] +; CHECK-NEXT: [[AND_EPIL]] = and i64 [[K_05_EPIL]], [[CONV]] ; CHECK-NEXT: [[ADD_EPIL]] = add i64 [[AND_EPIL]], [[G_06_EPIL]] ; CHECK-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 ; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 @@ -42,15 +42,9 @@ ; CHECK-NEXT: [[G_06:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7]], [[FOR_BODY]] ] ; CHECK-NEXT: [[K_05:%.*]] = phi i64 [ 1, [[FOR_BODY_LR_PH_NEW]] ], [ [[AND]], [[FOR_BODY]] ] ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[AND]] = and i64 [[CONV]], [[K_05]] -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[AND]], [[G_06]] -; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[AND]], [[ADD]] -; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[AND]], [[ADD_1]] -; CHECK-NEXT: [[ADD_3:%.*]] = add i64 [[AND]], [[ADD_2]] -; CHECK-NEXT: [[ADD_4:%.*]] = add i64 [[AND]], [[ADD_3]] -; CHECK-NEXT: [[ADD_5:%.*]] = add i64 [[AND]], [[ADD_4]] -; CHECK-NEXT: [[ADD_6:%.*]] = add i64 [[AND]], [[ADD_5]] -; CHECK-NEXT: [[ADD_7]] = add i64 [[AND]], [[ADD_6]] +; CHECK-NEXT: [[AND]] = and i64 [[K_05]], [[CONV]] +; CHECK-NEXT: [[FACTOR:%.*]] = mul i64 [[AND]], 8 +; CHECK-NEXT: [[ADD_7]] = add i64 [[FACTOR]], [[G_06]] ; CHECK-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 ; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 ; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY]] @@ -78,7 +72,7 @@ ; NPM-NEXT: [[G_06_EPIL:%.*]] = phi i64 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[ADD_LCSSA_PH]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] ; NPM-NEXT: [[K_05_EPIL:%.*]] = phi i64 [ [[AND_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[K_05_UNR]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] ; NPM-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]] ] -; NPM-NEXT: [[AND_EPIL]] = and i64 [[CONV]], [[K_05_EPIL]] +; NPM-NEXT: [[AND_EPIL]] = and i64 [[K_05_EPIL]], [[CONV]] ; NPM-NEXT: [[ADD_EPIL]] = add i64 [[AND_EPIL]], [[G_06_EPIL]] ; NPM-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 ; NPM-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 @@ -90,19 +84,13 @@ ; NPM-NEXT: [[G_06:%.*]] = phi i64 [ undef, [[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ] ; NPM-NEXT: [[AND_PHI]] = phi i64 [ [[AND_0]], [[FOR_BODY_LR_PH_NEW]] ], [ [[AND_1:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ] ; NPM-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ] -; NPM-NEXT: [[ADD:%.*]] = add i64 [[AND_PHI]], [[G_06]] -; NPM-NEXT: [[ADD_1:%.*]] = add i64 [[AND_PHI]], [[ADD]] -; NPM-NEXT: [[ADD_2:%.*]] = add i64 [[AND_PHI]], [[ADD_1]] -; NPM-NEXT: [[ADD_3:%.*]] = add i64 [[AND_PHI]], [[ADD_2]] -; NPM-NEXT: [[ADD_4:%.*]] = add i64 [[AND_PHI]], [[ADD_3]] -; NPM-NEXT: [[ADD_5:%.*]] = add i64 [[AND_PHI]], [[ADD_4]] -; NPM-NEXT: [[ADD_6:%.*]] = add i64 [[AND_PHI]], [[ADD_5]] -; NPM-NEXT: [[ADD_7]] = add i64 [[AND_PHI]], [[ADD_6]] +; NPM-NEXT: [[FACTOR:%.*]] = mul i64 [[AND_PHI]], 8 +; NPM-NEXT: [[ADD_7]] = add i64 [[FACTOR]], [[G_06]] ; NPM-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 ; NPM-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 ; NPM-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_COND_CLEANUP_LOOPEXIT_UNR_LCSSA]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ; NPM: for.body.for.body_crit_edge: -; NPM-NEXT: [[AND_1]] = and i64 [[CONV]], [[AND_PHI]] +; NPM-NEXT: [[AND_1]] = and i64 [[AND_PHI]], [[CONV]] ; NPM-NEXT: br label [[FOR_BODY]] ; entry: