Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -931,6 +931,7 @@ // And finally clean up LCSSA form before generating code. OptimizePM.addPass(InstSimplifyPass()); + OptimizePM.addPass(ReassociatePass()); // This hoists/decomposes div/rem ops. It should run after other sink/hoist // passes to avoid re-sinking, but before SimplifyCFG because it can allow Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -733,6 +733,7 @@ // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap)); + MPM.add(createReassociatePass()); } MPM.add(createWarnMissedTransformationsPass()); Index: test/CodeGen/AMDGPU/simplify-libcalls.ll =================================================================== --- test/CodeGen/AMDGPU/simplify-libcalls.ll +++ test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -298,9 +298,9 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 -; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx2, %tmp +; GCN: %__powx22 = fmul fast float %[[r0]], %__powx21 +; GCN: %__powprod3 = fmul fast float %__powx22, %__powx21 define amdgpu_kernel void @test_pow_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -313,9 +313,9 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 -; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx2, %tmp +; GCN: %__powx22 = fmul fast float %[[r0]], %__powx21 +; GCN: %__powprod3 = fmul fast float %__powx22, %__powx21 define amdgpu_kernel void @test_powr_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -330,9 +330,9 @@ ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c ; GCN: %__powx2 = fmul fast float %tmp, %tmp ; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 -; GCN: %__powx22 = fmul fast float %__powx2, %tmp -; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 -; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 +; GCN: %[[r0:.*]] = fmul fast float %__powx2, %tmp +; GCN: %__powx22 = fmul fast float %[[r0]], %__powx21 +; GCN: %__powprod3 = fmul fast float %__powx22, %__powx21 define amdgpu_kernel void @test_pown_c(float addrspace(1)* nocapture %a) { entry: %arrayidx = getelementptr inbounds float, float addrspace(1)* %a, i64 1 @@ -353,7 +353,7 @@ ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648 ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 -; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] +; GCN-PRELINK: %[[r2:.*]] = or i32 %[[r1]], %__pow_sign ; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* ; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 define amdgpu_kernel void @test_pow(float addrspace(1)* nocapture %a) { @@ -396,7 +396,7 @@ ; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32 ; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]] ; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32 -; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]] +; GCN-PRELINK: %[[r2:.*]] = or i32 %[[r1]], %__pow_sign ; GCN-PRELINK: %[[r3:.*]] = bitcast float addrspace(1)* %a to i32 addrspace(1)* ; GCN-PRELINK: store i32 %[[r2]], i32 addrspace(1)* %[[r3]], align 4 define amdgpu_kernel void @test_pown(float addrspace(1)* nocapture %a) { Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -257,6 +257,7 @@ ; CHECK-O-NEXT: Running pass: AlignmentFromAssumptionsPass ; CHECK-O-NEXT: Running pass: LoopSinkPass ; CHECK-O-NEXT: Running pass: InstSimplifyPass +; CHECK-O-NEXT: Running pass: ReassociatePass on foo ; CHECK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: SpeculateAroundPHIsPass Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -231,6 +231,7 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: AlignmentFromAssumptionsPass ; CHECK-POSTLINK-O-NEXT: Running pass: LoopSinkPass ; CHECK-POSTLINK-O-NEXT: Running pass: InstSimplifyPass +; CHECK-POSTLINK-O-NEXT: Running pass: ReassociatePass ; CHECK-POSTLINK-O-NEXT: Running pass: DivRemPairsPass ; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-POSTLINK-O-NEXT: Running pass: SpeculateAroundPHIsPass Index: test/Other/opt-O2-pipeline.ll =================================================================== --- test/Other/opt-O2-pipeline.ll +++ test/Other/opt-O2-pipeline.ll @@ -252,10 +252,12 @@ ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion +; CHECK-NEXT: Reassociate expressions ; CHECK-NEXT: Lazy Branch Probability Analysis ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Warn about non-applied transformations +; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Alignment from assumptions ; CHECK-NEXT: Strip Unused Function Prototypes ; CHECK-NEXT: Dead Global Elimination Index: test/Other/opt-O3-pipeline.ll =================================================================== --- test/Other/opt-O3-pipeline.ll +++ test/Other/opt-O3-pipeline.ll @@ -257,10 +257,12 @@ ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion +; CHECK-NEXT: Reassociate expressions ; CHECK-NEXT: Lazy Branch Probability Analysis ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Warn about non-applied transformations +; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Alignment from assumptions ; CHECK-NEXT: Strip Unused Function Prototypes ; CHECK-NEXT: Dead Global Elimination Index: test/Other/opt-Os-pipeline.ll =================================================================== --- test/Other/opt-Os-pipeline.ll +++ test/Other/opt-Os-pipeline.ll @@ -239,10 +239,12 @@ ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Loop Invariant Code Motion +; CHECK-NEXT: Reassociate expressions ; CHECK-NEXT: Lazy Branch Probability Analysis ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Warn about non-applied transformations +; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Alignment from assumptions ; CHECK-NEXT: Strip Unused Function Prototypes ; CHECK-NEXT: Dead Global Elimination Index: test/Transforms/LoopVectorize/X86/masked_load_store.ll =================================================================== --- test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -26,11 +26,11 @@ ; AVX1-NEXT: [[SCEVGEP14:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 10000 ; AVX1-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP11]], [[A]] ; AVX1-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[TRIGGER]] -; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX1-NEXT: [[BOUND016:%.*]] = icmp ugt i32* [[SCEVGEP14]], [[A]] ; AVX1-NEXT: [[BOUND117:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]] -; AVX1-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX1-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -100,11 +100,11 @@ ; AVX2-NEXT: [[SCEVGEP14:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 10000 ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP11]], [[A]] ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[TRIGGER]] -; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX2-NEXT: [[BOUND016:%.*]] = icmp ugt i32* [[SCEVGEP14]], [[A]] ; AVX2-NEXT: [[BOUND117:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]] -; AVX2-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX2-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -269,11 +269,11 @@ ; AVX512-NEXT: [[SCEVGEP14:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 10000 ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP11]], [[A]] ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[TRIGGER]] -; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX512-NEXT: [[BOUND016:%.*]] = icmp ugt i32* [[SCEVGEP14]], [[A]] ; AVX512-NEXT: [[BOUND117:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]] -; AVX512-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX512-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -499,11 +499,11 @@ ; AVX1-NEXT: [[SCEVGEP14:%.*]] = getelementptr i32, i32 addrspace(1)* [[B:%.*]], i64 10000 ; AVX1-NEXT: [[BOUND0:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP11]], [[A]] ; AVX1-NEXT: [[BOUND1:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP]], [[TRIGGER]] -; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX1-NEXT: [[BOUND016:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP14]], [[A]] ; AVX1-NEXT: [[BOUND117:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP]], [[B]] -; AVX1-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX1-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -573,11 +573,11 @@ ; AVX2-NEXT: [[SCEVGEP14:%.*]] = getelementptr i32, i32 addrspace(1)* [[B:%.*]], i64 10000 ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP11]], [[A]] ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP]], [[TRIGGER]] -; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX2-NEXT: [[BOUND016:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP14]], [[A]] ; AVX2-NEXT: [[BOUND117:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP]], [[B]] -; AVX2-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX2-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -742,11 +742,11 @@ ; AVX512-NEXT: [[SCEVGEP14:%.*]] = getelementptr i32, i32 addrspace(1)* [[B:%.*]], i64 10000 ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP11]], [[A]] ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP]], [[TRIGGER]] -; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX512-NEXT: [[BOUND016:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP14]], [[A]] ; AVX512-NEXT: [[BOUND117:%.*]] = icmp ugt i32 addrspace(1)* [[SCEVGEP]], [[B]] -; AVX512-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX512-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -983,11 +983,11 @@ ; AVX1-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[TMP0]], [[A]] ; AVX1-NEXT: [[TMP1:%.*]] = bitcast float* [[SCEVGEP]] to i32* ; AVX1-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX1-NEXT: [[BOUND016:%.*]] = icmp ugt float* [[SCEVGEP14]], [[A]] ; AVX1-NEXT: [[BOUND117:%.*]] = icmp ugt float* [[SCEVGEP]], [[B]] -; AVX1-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX1-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -1023,10 +1023,10 @@ ; AVX1-NEXT: [[TMP23:%.*]] = sitofp <8 x i32> [[WIDE_LOAD22]] to <8 x float> ; AVX1-NEXT: [[TMP24:%.*]] = sitofp <8 x i32> [[WIDE_LOAD23]] to <8 x float> ; AVX1-NEXT: [[TMP25:%.*]] = sitofp <8 x i32> [[WIDE_LOAD24]] to <8 x float> -; AVX1-NEXT: [[TMP26:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP22]] -; AVX1-NEXT: [[TMP27:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD25]], [[TMP23]] -; AVX1-NEXT: [[TMP28:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD26]], [[TMP24]] -; AVX1-NEXT: [[TMP29:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD27]], [[TMP25]] +; AVX1-NEXT: [[TMP26:%.*]] = fadd <8 x float> [[TMP22]], [[WIDE_MASKED_LOAD]] +; AVX1-NEXT: [[TMP27:%.*]] = fadd <8 x float> [[TMP23]], [[WIDE_MASKED_LOAD25]] +; AVX1-NEXT: [[TMP28:%.*]] = fadd <8 x float> [[TMP24]], [[WIDE_MASKED_LOAD26]] +; AVX1-NEXT: [[TMP29:%.*]] = fadd <8 x float> [[TMP25]], [[WIDE_MASKED_LOAD27]] ; AVX1-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] ; AVX1-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <8 x float>* ; AVX1-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP26]], <8 x float>* [[TMP31]], i32 4, <8 x i1> [[TMP10]]), !alias.scope !26, !noalias !28 @@ -1055,7 +1055,7 @@ ; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] ; AVX1-NEXT: [[TMP40:%.*]] = load float, float* [[ARRAYIDX3]], align 4 ; AVX1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP39]] to float -; AVX1-NEXT: [[ADD:%.*]] = fadd float [[TMP40]], [[CONV]] +; AVX1-NEXT: [[ADD:%.*]] = fadd float [[CONV]], [[TMP40]] ; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; AVX1-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4 ; AVX1-NEXT: br label [[FOR_INC]] @@ -1071,7 +1071,7 @@ ; AVX1-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_NEXT]] ; AVX1-NEXT: [[TMP42:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4 ; AVX1-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP41]] to float -; AVX1-NEXT: [[ADD_1:%.*]] = fadd float [[TMP42]], [[CONV_1]] +; AVX1-NEXT: [[ADD_1:%.*]] = fadd float [[CONV_1]], [[TMP42]] ; AVX1-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX1-NEXT: store float [[ADD_1]], float* [[ARRAYIDX7_1]], align 4 ; AVX1-NEXT: br label [[FOR_INC_1]] @@ -1089,11 +1089,11 @@ ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[TMP0]], [[A]] ; AVX2-NEXT: [[TMP1:%.*]] = bitcast float* [[SCEVGEP]] to i32* ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX2-NEXT: [[BOUND016:%.*]] = icmp ugt float* [[SCEVGEP14]], [[A]] ; AVX2-NEXT: [[BOUND117:%.*]] = icmp ugt float* [[SCEVGEP]], [[B]] -; AVX2-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX2-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -1129,10 +1129,10 @@ ; AVX2-NEXT: [[TMP23:%.*]] = sitofp <8 x i32> [[WIDE_LOAD22]] to <8 x float> ; AVX2-NEXT: [[TMP24:%.*]] = sitofp <8 x i32> [[WIDE_LOAD23]] to <8 x float> ; AVX2-NEXT: [[TMP25:%.*]] = sitofp <8 x i32> [[WIDE_LOAD24]] to <8 x float> -; AVX2-NEXT: [[TMP26:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD]], [[TMP22]] -; AVX2-NEXT: [[TMP27:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD25]], [[TMP23]] -; AVX2-NEXT: [[TMP28:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD26]], [[TMP24]] -; AVX2-NEXT: [[TMP29:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD27]], [[TMP25]] +; AVX2-NEXT: [[TMP26:%.*]] = fadd <8 x float> [[TMP22]], [[WIDE_MASKED_LOAD]] +; AVX2-NEXT: [[TMP27:%.*]] = fadd <8 x float> [[TMP23]], [[WIDE_MASKED_LOAD25]] +; AVX2-NEXT: [[TMP28:%.*]] = fadd <8 x float> [[TMP24]], [[WIDE_MASKED_LOAD26]] +; AVX2-NEXT: [[TMP29:%.*]] = fadd <8 x float> [[TMP25]], [[WIDE_MASKED_LOAD27]] ; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] ; AVX2-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <8 x float>* ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[TMP26]], <8 x float>* [[TMP31]], i32 4, <8 x i1> [[TMP10]]), !alias.scope !26, !noalias !28 @@ -1161,7 +1161,7 @@ ; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] ; AVX2-NEXT: [[TMP40:%.*]] = load float, float* [[ARRAYIDX3]], align 4 ; AVX2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP39]] to float -; AVX2-NEXT: [[ADD:%.*]] = fadd float [[TMP40]], [[CONV]] +; AVX2-NEXT: [[ADD:%.*]] = fadd float [[CONV]], [[TMP40]] ; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; AVX2-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4 ; AVX2-NEXT: br label [[FOR_INC]] @@ -1177,7 +1177,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_NEXT]] ; AVX2-NEXT: [[TMP42:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4 ; AVX2-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP41]] to float -; AVX2-NEXT: [[ADD_1:%.*]] = fadd float [[TMP42]], [[CONV_1]] +; AVX2-NEXT: [[ADD_1:%.*]] = fadd float [[CONV_1]], [[TMP42]] ; AVX2-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX2-NEXT: store float [[ADD_1]], float* [[ARRAYIDX7_1]], align 4 ; AVX2-NEXT: br label [[FOR_INC_1]] @@ -1191,7 +1191,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_NEXT_1]] ; AVX2-NEXT: [[TMP44:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4 ; AVX2-NEXT: [[CONV_2:%.*]] = sitofp i32 [[TMP43]] to float -; AVX2-NEXT: [[ADD_2:%.*]] = fadd float [[TMP44]], [[CONV_2]] +; AVX2-NEXT: [[ADD_2:%.*]] = fadd float [[CONV_2]], [[TMP44]] ; AVX2-NEXT: [[ARRAYIDX7_2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_1]] ; AVX2-NEXT: store float [[ADD_2]], float* [[ARRAYIDX7_2]], align 4 ; AVX2-NEXT: br label [[FOR_INC_2]] @@ -1205,7 +1205,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_NEXT_2]] ; AVX2-NEXT: [[TMP46:%.*]] = load float, float* [[ARRAYIDX3_3]], align 4 ; AVX2-NEXT: [[CONV_3:%.*]] = sitofp i32 [[TMP45]] to float -; AVX2-NEXT: [[ADD_3:%.*]] = fadd float [[TMP46]], [[CONV_3]] +; AVX2-NEXT: [[ADD_3:%.*]] = fadd float [[CONV_3]], [[TMP46]] ; AVX2-NEXT: [[ARRAYIDX7_3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_2]] ; AVX2-NEXT: store float [[ADD_3]], float* [[ARRAYIDX7_3]], align 4 ; AVX2-NEXT: br label [[FOR_INC_3]] @@ -1223,11 +1223,11 @@ ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt float* [[TMP0]], [[A]] ; AVX512-NEXT: [[TMP1:%.*]] = bitcast float* [[SCEVGEP]] to i32* ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX512-NEXT: [[BOUND016:%.*]] = icmp ugt float* [[SCEVGEP14]], [[A]] ; AVX512-NEXT: [[BOUND117:%.*]] = icmp ugt float* [[SCEVGEP]], [[B]] -; AVX512-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX512-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -1263,10 +1263,10 @@ ; AVX512-NEXT: [[TMP23:%.*]] = sitofp <16 x i32> [[WIDE_LOAD22]] to <16 x float> ; AVX512-NEXT: [[TMP24:%.*]] = sitofp <16 x i32> [[WIDE_LOAD23]] to <16 x float> ; AVX512-NEXT: [[TMP25:%.*]] = sitofp <16 x i32> [[WIDE_LOAD24]] to <16 x float> -; AVX512-NEXT: [[TMP26:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD]], [[TMP22]] -; AVX512-NEXT: [[TMP27:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD25]], [[TMP23]] -; AVX512-NEXT: [[TMP28:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD26]], [[TMP24]] -; AVX512-NEXT: [[TMP29:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD27]], [[TMP25]] +; AVX512-NEXT: [[TMP26:%.*]] = fadd <16 x float> [[TMP22]], [[WIDE_MASKED_LOAD]] +; AVX512-NEXT: [[TMP27:%.*]] = fadd <16 x float> [[TMP23]], [[WIDE_MASKED_LOAD25]] +; AVX512-NEXT: [[TMP28:%.*]] = fadd <16 x float> [[TMP24]], [[WIDE_MASKED_LOAD26]] +; AVX512-NEXT: [[TMP29:%.*]] = fadd <16 x float> [[TMP25]], [[WIDE_MASKED_LOAD27]] ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]] ; AVX512-NEXT: [[TMP31:%.*]] = bitcast float* [[TMP30]] to <16 x float>* ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0v16f32(<16 x float> [[TMP26]], <16 x float>* [[TMP31]], i32 4, <16 x i1> [[TMP10]]), !alias.scope !26, !noalias !28 @@ -1295,7 +1295,7 @@ ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] ; AVX512-NEXT: [[TMP40:%.*]] = load float, float* [[ARRAYIDX3]], align 4 ; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP39]] to float -; AVX512-NEXT: [[ADD:%.*]] = fadd float [[TMP40]], [[CONV]] +; AVX512-NEXT: [[ADD:%.*]] = fadd float [[CONV]], [[TMP40]] ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] ; AVX512-NEXT: store float [[ADD]], float* [[ARRAYIDX7]], align 4 ; AVX512-NEXT: br label [[FOR_INC]] @@ -1311,7 +1311,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_NEXT]] ; AVX512-NEXT: [[TMP42:%.*]] = load float, float* [[ARRAYIDX3_1]], align 4 ; AVX512-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP41]] to float -; AVX512-NEXT: [[ADD_1:%.*]] = fadd float [[TMP42]], [[CONV_1]] +; AVX512-NEXT: [[ADD_1:%.*]] = fadd float [[CONV_1]], [[TMP42]] ; AVX512-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX512-NEXT: store float [[ADD_1]], float* [[ARRAYIDX7_1]], align 4 ; AVX512-NEXT: br label [[FOR_INC_1]] @@ -1325,7 +1325,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_NEXT_1]] ; AVX512-NEXT: [[TMP44:%.*]] = load float, float* [[ARRAYIDX3_2]], align 4 ; AVX512-NEXT: [[CONV_2:%.*]] = sitofp i32 [[TMP43]] to float -; AVX512-NEXT: [[ADD_2:%.*]] = fadd float [[TMP44]], [[CONV_2]] +; AVX512-NEXT: [[ADD_2:%.*]] = fadd float [[CONV_2]], [[TMP44]] ; AVX512-NEXT: [[ARRAYIDX7_2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_1]] ; AVX512-NEXT: store float [[ADD_2]], float* [[ARRAYIDX7_2]], align 4 ; AVX512-NEXT: br label [[FOR_INC_2]] @@ -1339,7 +1339,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV_NEXT_2]] ; AVX512-NEXT: [[TMP46:%.*]] = load float, float* [[ARRAYIDX3_3]], align 4 ; AVX512-NEXT: [[CONV_3:%.*]] = sitofp i32 [[TMP45]] to float -; AVX512-NEXT: [[ADD_3:%.*]] = fadd float [[TMP46]], [[CONV_3]] +; AVX512-NEXT: [[ADD_3:%.*]] = fadd float [[CONV_3]], [[TMP46]] ; AVX512-NEXT: [[ARRAYIDX7_3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV_NEXT_2]] ; AVX512-NEXT: store float [[ADD_3]], float* [[ARRAYIDX7_3]], align 4 ; AVX512-NEXT: br label [[FOR_INC_3]] @@ -1428,11 +1428,11 @@ ; AVX1-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[TMP0]], [[A]] ; AVX1-NEXT: [[TMP1:%.*]] = bitcast double* [[SCEVGEP]] to i32* ; AVX1-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX1-NEXT: [[BOUND016:%.*]] = icmp ugt double* [[SCEVGEP14]], [[A]] ; AVX1-NEXT: [[BOUND117:%.*]] = icmp ugt double* [[SCEVGEP]], [[B]] -; AVX1-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX1-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -1468,10 +1468,10 @@ ; AVX1-NEXT: [[TMP23:%.*]] = sitofp <4 x i32> [[WIDE_LOAD22]] to <4 x double> ; AVX1-NEXT: [[TMP24:%.*]] = sitofp <4 x i32> [[WIDE_LOAD23]] to <4 x double> ; AVX1-NEXT: [[TMP25:%.*]] = sitofp <4 x i32> [[WIDE_LOAD24]] to <4 x double> -; AVX1-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], [[TMP22]] -; AVX1-NEXT: [[TMP27:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD25]], [[TMP23]] -; AVX1-NEXT: [[TMP28:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD26]], [[TMP24]] -; AVX1-NEXT: [[TMP29:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD27]], [[TMP25]] +; AVX1-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[TMP22]], [[WIDE_MASKED_LOAD]] +; AVX1-NEXT: [[TMP27:%.*]] = fadd <4 x double> [[TMP23]], [[WIDE_MASKED_LOAD25]] +; AVX1-NEXT: [[TMP28:%.*]] = fadd <4 x double> [[TMP24]], [[WIDE_MASKED_LOAD26]] +; AVX1-NEXT: [[TMP29:%.*]] = fadd <4 x double> [[TMP25]], [[WIDE_MASKED_LOAD27]] ; AVX1-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDEX]] ; AVX1-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <4 x double>* ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP26]], <4 x double>* [[TMP31]], i32 8, <4 x i1> [[TMP10]]), !alias.scope !36, !noalias !38 @@ -1497,7 +1497,7 @@ ; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV]] ; AVX1-NEXT: [[TMP40:%.*]] = load double, double* [[ARRAYIDX3]], align 8 ; AVX1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP39]] to double -; AVX1-NEXT: [[ADD:%.*]] = fadd double [[TMP40]], [[CONV]] +; AVX1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP40]] ; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] ; AVX1-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 ; AVX1-NEXT: br label [[FOR_INC]] @@ -1513,7 +1513,7 @@ ; AVX1-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV_NEXT]] ; AVX1-NEXT: [[TMP42:%.*]] = load double, double* [[ARRAYIDX3_1]], align 8 ; AVX1-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP41]] to double -; AVX1-NEXT: [[ADD_1:%.*]] = fadd double [[TMP42]], [[CONV_1]] +; AVX1-NEXT: [[ADD_1:%.*]] = fadd double [[CONV_1]], [[TMP42]] ; AVX1-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX1-NEXT: store double [[ADD_1]], double* [[ARRAYIDX7_1]], align 8 ; AVX1-NEXT: br label [[FOR_INC_1]] @@ -1531,11 +1531,11 @@ ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[TMP0]], [[A]] ; AVX2-NEXT: [[TMP1:%.*]] = bitcast double* [[SCEVGEP]] to i32* ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX2-NEXT: [[BOUND016:%.*]] = icmp ugt double* [[SCEVGEP14]], [[A]] ; AVX2-NEXT: [[BOUND117:%.*]] = icmp ugt double* [[SCEVGEP]], [[B]] -; AVX2-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX2-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -1571,10 +1571,10 @@ ; AVX2-NEXT: [[TMP23:%.*]] = sitofp <4 x i32> [[WIDE_LOAD22]] to <4 x double> ; AVX2-NEXT: [[TMP24:%.*]] = sitofp <4 x i32> [[WIDE_LOAD23]] to <4 x double> ; AVX2-NEXT: [[TMP25:%.*]] = sitofp <4 x i32> [[WIDE_LOAD24]] to <4 x double> -; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], [[TMP22]] -; AVX2-NEXT: [[TMP27:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD25]], [[TMP23]] -; AVX2-NEXT: [[TMP28:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD26]], [[TMP24]] -; AVX2-NEXT: [[TMP29:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD27]], [[TMP25]] +; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[TMP22]], [[WIDE_MASKED_LOAD]] +; AVX2-NEXT: [[TMP27:%.*]] = fadd <4 x double> [[TMP23]], [[WIDE_MASKED_LOAD25]] +; AVX2-NEXT: [[TMP28:%.*]] = fadd <4 x double> [[TMP24]], [[WIDE_MASKED_LOAD26]] +; AVX2-NEXT: [[TMP29:%.*]] = fadd <4 x double> [[TMP25]], [[WIDE_MASKED_LOAD27]] ; AVX2-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDEX]] ; AVX2-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <4 x double>* ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[TMP26]], <4 x double>* [[TMP31]], i32 8, <4 x i1> [[TMP10]]), !alias.scope !36, !noalias !38 @@ -1600,7 +1600,7 @@ ; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV]] ; AVX2-NEXT: [[TMP40:%.*]] = load double, double* [[ARRAYIDX3]], align 8 ; AVX2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP39]] to double -; AVX2-NEXT: [[ADD:%.*]] = fadd double [[TMP40]], [[CONV]] +; AVX2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP40]] ; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] ; AVX2-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 ; AVX2-NEXT: br label [[FOR_INC]] @@ -1616,7 +1616,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV_NEXT]] ; AVX2-NEXT: [[TMP42:%.*]] = load double, double* [[ARRAYIDX3_1]], align 8 ; AVX2-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP41]] to double -; AVX2-NEXT: [[ADD_1:%.*]] = fadd double [[TMP42]], [[CONV_1]] +; AVX2-NEXT: [[ADD_1:%.*]] = fadd double [[CONV_1]], [[TMP42]] ; AVX2-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX2-NEXT: store double [[ADD_1]], double* [[ARRAYIDX7_1]], align 8 ; AVX2-NEXT: br label [[FOR_INC_1]] @@ -1630,7 +1630,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV_NEXT_1]] ; AVX2-NEXT: [[TMP44:%.*]] = load double, double* [[ARRAYIDX3_2]], align 8 ; AVX2-NEXT: [[CONV_2:%.*]] = sitofp i32 [[TMP43]] to double -; AVX2-NEXT: [[ADD_2:%.*]] = fadd double [[TMP44]], [[CONV_2]] +; AVX2-NEXT: [[ADD_2:%.*]] = fadd double [[CONV_2]], [[TMP44]] ; AVX2-NEXT: [[ARRAYIDX7_2:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT_1]] ; AVX2-NEXT: store double [[ADD_2]], double* [[ARRAYIDX7_2]], align 8 ; AVX2-NEXT: br label [[FOR_INC_2]] @@ -1644,7 +1644,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV_NEXT_2]] ; AVX2-NEXT: [[TMP46:%.*]] = load double, double* [[ARRAYIDX3_3]], align 8 ; AVX2-NEXT: [[CONV_3:%.*]] = sitofp i32 [[TMP45]] to double -; AVX2-NEXT: [[ADD_3:%.*]] = fadd double [[TMP46]], [[CONV_3]] +; AVX2-NEXT: [[ADD_3:%.*]] = fadd double [[CONV_3]], [[TMP46]] ; AVX2-NEXT: [[ARRAYIDX7_3:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT_2]] ; AVX2-NEXT: store double [[ADD_3]], double* [[ARRAYIDX7_3]], align 8 ; AVX2-NEXT: br label [[FOR_INC_3]] @@ -1662,11 +1662,11 @@ ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[TMP0]], [[A]] ; AVX512-NEXT: [[TMP1:%.*]] = bitcast double* [[SCEVGEP]] to i32* ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX512-NEXT: [[BOUND016:%.*]] = icmp ugt double* [[SCEVGEP14]], [[A]] ; AVX512-NEXT: [[BOUND117:%.*]] = icmp ugt double* [[SCEVGEP]], [[B]] -; AVX512-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND016]], [[BOUND117]] -; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT18]] +; AVX512-NEXT: [[FOUND_CONFLICT18:%.*]] = and i1 [[BOUND117]], [[BOUND016]] +; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT18]], [[FOUND_CONFLICT]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -1702,10 +1702,10 @@ ; AVX512-NEXT: [[TMP23:%.*]] = sitofp <8 x i32> [[WIDE_LOAD22]] to <8 x double> ; AVX512-NEXT: [[TMP24:%.*]] = sitofp <8 x i32> [[WIDE_LOAD23]] to <8 x double> ; AVX512-NEXT: [[TMP25:%.*]] = sitofp <8 x i32> [[WIDE_LOAD24]] to <8 x double> -; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD]], [[TMP22]] -; AVX512-NEXT: [[TMP27:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD25]], [[TMP23]] -; AVX512-NEXT: [[TMP28:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD26]], [[TMP24]] -; AVX512-NEXT: [[TMP29:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD27]], [[TMP25]] +; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[TMP22]], [[WIDE_MASKED_LOAD]] +; AVX512-NEXT: [[TMP27:%.*]] = fadd <8 x double> [[TMP23]], [[WIDE_MASKED_LOAD25]] +; AVX512-NEXT: [[TMP28:%.*]] = fadd <8 x double> [[TMP24]], [[WIDE_MASKED_LOAD26]] +; AVX512-NEXT: [[TMP29:%.*]] = fadd <8 x double> [[TMP25]], [[WIDE_MASKED_LOAD27]] ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDEX]] ; AVX512-NEXT: [[TMP31:%.*]] = bitcast double* [[TMP30]] to <8 x double>* ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[TMP26]], <8 x double>* [[TMP31]], i32 8, <8 x i1> [[TMP10]]), !alias.scope !36, !noalias !38 @@ -1734,7 +1734,7 @@ ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV]] ; AVX512-NEXT: [[TMP40:%.*]] = load double, double* [[ARRAYIDX3]], align 8 ; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP39]] to double -; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP40]], [[CONV]] +; AVX512-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP40]] ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] ; AVX512-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 ; AVX512-NEXT: br label [[FOR_INC]] @@ -1750,7 +1750,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV_NEXT]] ; AVX512-NEXT: [[TMP42:%.*]] = load double, double* [[ARRAYIDX3_1]], align 8 ; AVX512-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP41]] to double -; AVX512-NEXT: [[ADD_1:%.*]] = fadd double [[TMP42]], [[CONV_1]] +; AVX512-NEXT: [[ADD_1:%.*]] = fadd double [[CONV_1]], [[TMP42]] ; AVX512-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX512-NEXT: store double [[ADD_1]], double* [[ARRAYIDX7_1]], align 8 ; AVX512-NEXT: br label [[FOR_INC_1]] @@ -1764,7 +1764,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV_NEXT_1]] ; AVX512-NEXT: [[TMP44:%.*]] = load double, double* [[ARRAYIDX3_2]], align 8 ; AVX512-NEXT: [[CONV_2:%.*]] = sitofp i32 [[TMP43]] to double -; AVX512-NEXT: [[ADD_2:%.*]] = fadd double [[TMP44]], [[CONV_2]] +; AVX512-NEXT: [[ADD_2:%.*]] = fadd double [[CONV_2]], [[TMP44]] ; AVX512-NEXT: [[ARRAYIDX7_2:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT_1]] ; AVX512-NEXT: store double [[ADD_2]], double* [[ARRAYIDX7_2]], align 8 ; AVX512-NEXT: br label [[FOR_INC_2]] @@ -1778,7 +1778,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDVARS_IV_NEXT_2]] ; AVX512-NEXT: [[TMP46:%.*]] = load double, double* [[ARRAYIDX3_3]], align 8 ; AVX512-NEXT: [[CONV_3:%.*]] = sitofp i32 [[TMP45]] to double -; AVX512-NEXT: [[ADD_3:%.*]] = fadd double [[TMP46]], [[CONV_3]] +; AVX512-NEXT: [[ADD_3:%.*]] = fadd double [[CONV_3]], [[TMP46]] ; AVX512-NEXT: [[ARRAYIDX7_3:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT_2]] ; AVX512-NEXT: store double [[ADD_3]], double* [[ARRAYIDX7_3]], align 8 ; AVX512-NEXT: br label [[FOR_INC_3]] @@ -1872,7 +1872,7 @@ ; AVX1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP1]] ; AVX1-NEXT: [[TMP2:%.*]] = load double, double* [[ARRAYIDX3]], align 8 ; AVX1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double -; AVX1-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], [[CONV]] +; AVX1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP2]] ; AVX1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] ; AVX1-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 ; AVX1-NEXT: br label [[FOR_INC]] @@ -1892,7 +1892,7 @@ ; AVX1-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]] ; AVX1-NEXT: [[TMP5:%.*]] = load double, double* [[ARRAYIDX3_1]], align 8 ; AVX1-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP3]] to double -; AVX1-NEXT: [[ADD_1:%.*]] = fadd double [[TMP5]], [[CONV_1]] +; AVX1-NEXT: [[ADD_1:%.*]] = fadd double [[CONV_1]], [[TMP5]] ; AVX1-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX1-NEXT: store double [[ADD_1]], double* [[ARRAYIDX7_1]], align 8 ; AVX1-NEXT: br label [[FOR_INC_1]] @@ -1914,7 +1914,7 @@ ; AVX2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP1]] ; AVX2-NEXT: [[TMP2:%.*]] = load double, double* [[ARRAYIDX3]], align 8 ; AVX2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double -; AVX2-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], [[CONV]] +; AVX2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP2]] ; AVX2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[INDVARS_IV]] ; AVX2-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 ; AVX2-NEXT: br label [[FOR_INC]] @@ -1934,7 +1934,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]] ; AVX2-NEXT: [[TMP5:%.*]] = load double, double* [[ARRAYIDX3_1]], align 8 ; AVX2-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP3]] to double -; AVX2-NEXT: [[ADD_1:%.*]] = fadd double [[TMP5]], [[CONV_1]] +; AVX2-NEXT: [[ADD_1:%.*]] = fadd double [[CONV_1]], [[TMP5]] ; AVX2-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX2-NEXT: store double [[ADD_1]], double* [[ARRAYIDX7_1]], align 8 ; AVX2-NEXT: br label [[FOR_INC_1]] @@ -1949,7 +1949,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]] ; AVX2-NEXT: [[TMP8:%.*]] = load double, double* [[ARRAYIDX3_2]], align 8 ; AVX2-NEXT: [[CONV_2:%.*]] = sitofp i32 [[TMP6]] to double -; AVX2-NEXT: [[ADD_2:%.*]] = fadd double [[TMP8]], [[CONV_2]] +; AVX2-NEXT: [[ADD_2:%.*]] = fadd double [[CONV_2]], [[TMP8]] ; AVX2-NEXT: [[ARRAYIDX7_2:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT_1]] ; AVX2-NEXT: store double [[ADD_2]], double* [[ARRAYIDX7_2]], align 8 ; AVX2-NEXT: br label [[FOR_INC_2]] @@ -1964,7 +1964,7 @@ ; AVX2-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP10]] ; AVX2-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX3_3]], align 8 ; AVX2-NEXT: [[CONV_3:%.*]] = sitofp i32 [[TMP9]] to double -; AVX2-NEXT: [[ADD_3:%.*]] = fadd double [[TMP11]], [[CONV_3]] +; AVX2-NEXT: [[ADD_3:%.*]] = fadd double [[CONV_3]], [[TMP11]] ; AVX2-NEXT: [[ARRAYIDX7_3:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT_2]] ; AVX2-NEXT: store double [[ADD_3]], double* [[ARRAYIDX7_3]], align 8 ; AVX2-NEXT: br label [[FOR_INC_3]] @@ -1975,17 +1975,17 @@ ; AVX512-LABEL: @foo4( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[A:%.*]], i64 9985 -; AVX512-NEXT: [[SCEVGEP12:%.*]] = getelementptr i32, i32* [[TRIGGER:%.*]], i64 9985 -; AVX512-NEXT: [[SCEVGEP15:%.*]] = getelementptr double, double* [[B:%.*]], i64 19969 -; AVX512-NEXT: [[TMP0:%.*]] = bitcast i32* [[SCEVGEP12]] to double* +; AVX512-NEXT: [[SCEVGEP13:%.*]] = getelementptr i32, i32* [[TRIGGER:%.*]], i64 9985 +; AVX512-NEXT: [[SCEVGEP16:%.*]] = getelementptr double, double* [[B:%.*]], i64 19969 +; AVX512-NEXT: [[TMP0:%.*]] = bitcast i32* [[SCEVGEP13]] to double* ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[TMP0]], [[A]] ; AVX512-NEXT: [[TMP1:%.*]] = bitcast double* [[SCEVGEP]] to i32* ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; AVX512-NEXT: [[BOUND017:%.*]] = icmp ugt double* [[SCEVGEP15]], [[A]] -; AVX512-NEXT: [[BOUND118:%.*]] = icmp ugt double* [[SCEVGEP]], [[B]] -; AVX512-NEXT: [[FOUND_CONFLICT19:%.*]] = and i1 [[BOUND017]], [[BOUND118]] -; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT19]] +; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] +; AVX512-NEXT: [[BOUND018:%.*]] = icmp ugt double* [[SCEVGEP16]], [[A]] +; AVX512-NEXT: [[BOUND119:%.*]] = icmp ugt double* [[SCEVGEP]], [[B]] +; AVX512-NEXT: [[FOUND_CONFLICT20:%.*]] = and i1 [[BOUND119]], [[BOUND018]] +; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT20]], [[FOUND_CONFLICT]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT_2:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -1995,9 +1995,9 @@ ; AVX512-NEXT: [[TMP3:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], ; AVX512-NEXT: [[TMP4:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], ; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[B]], <8 x i64> [[TMP4]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER20:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP5]], i32 8, <8 x i1> [[TMP3]], <8 x double> undef), !alias.scope !44 +; AVX512-NEXT: [[WIDE_MASKED_GATHER21:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP5]], i32 8, <8 x i1> [[TMP3]], <8 x double> undef), !alias.scope !44 ; AVX512-NEXT: [[TMP6:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER]] to <8 x double> -; AVX512-NEXT: [[TMP7:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER20]], [[TMP6]] +; AVX512-NEXT: [[TMP7:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER21]], [[TMP6]] ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[A]], <8 x i64> [[VEC_IND]] ; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> [[TMP7]], <8 x double*> [[TMP8]], i32 8, <8 x i1> [[TMP3]]), !alias.scope !46, !noalias !48 ; AVX512-NEXT: [[VEC_IND_NEXT:%.*]] = add <8 x i64> [[VEC_IND]], @@ -2006,9 +2006,9 @@ ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER_1]], ; AVX512-NEXT: [[TMP11:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND_NEXT]], ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[B]], <8 x i64> [[TMP11]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER20_1:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP12]], i32 8, <8 x i1> [[TMP10]], <8 x double> undef), !alias.scope !44 +; AVX512-NEXT: [[WIDE_MASKED_GATHER21_1:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP12]], i32 8, <8 x i1> [[TMP10]], <8 x double> undef), !alias.scope !44 ; AVX512-NEXT: [[TMP13:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER_1]] to <8 x double> -; AVX512-NEXT: [[TMP14:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER20_1]], [[TMP13]] +; AVX512-NEXT: [[TMP14:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER21_1]], [[TMP13]] ; AVX512-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[A]], <8 x i64> [[VEC_IND_NEXT]] ; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> [[TMP14]], <8 x double*> [[TMP15]], i32 8, <8 x i1> [[TMP10]]), !alias.scope !46, !noalias !48 ; AVX512-NEXT: [[VEC_IND_NEXT_1:%.*]] = add <8 x i64> [[VEC_IND]], @@ -2017,9 +2017,9 @@ ; AVX512-NEXT: [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER_2]], ; AVX512-NEXT: [[TMP18:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND_NEXT_1]], ; AVX512-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[B]], <8 x i64> [[TMP18]] -; AVX512-NEXT: [[WIDE_MASKED_GATHER20_2:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP19]], i32 8, <8 x i1> [[TMP17]], <8 x double> undef), !alias.scope !44 +; AVX512-NEXT: [[WIDE_MASKED_GATHER21_2:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> [[TMP19]], i32 8, <8 x i1> [[TMP17]], <8 x double> undef), !alias.scope !44 ; AVX512-NEXT: [[TMP20:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER_2]] to <8 x double> -; AVX512-NEXT: [[TMP21:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER20_2]], [[TMP20]] +; AVX512-NEXT: [[TMP21:%.*]] = fadd <8 x double> [[WIDE_MASKED_GATHER21_2]], [[TMP20]] ; AVX512-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[A]], <8 x i64> [[VEC_IND_NEXT_1]] ; AVX512-NEXT: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> [[TMP21]], <8 x double*> [[TMP22]], i32 8, <8 x i1> [[TMP17]]), !alias.scope !46, !noalias !48 ; AVX512-NEXT: [[INDEX_NEXT_2]] = add nuw nsw i64 [[INDEX]], 24 @@ -2042,7 +2042,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_PROL:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP26]] ; AVX512-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX3_PROL]], align 8 ; AVX512-NEXT: [[CONV_PROL:%.*]] = sitofp i32 [[TMP25]] to double -; AVX512-NEXT: [[ADD_PROL:%.*]] = fadd double [[TMP27]], [[CONV_PROL]] +; AVX512-NEXT: [[ADD_PROL:%.*]] = fadd double [[CONV_PROL]], [[TMP27]] ; AVX512-NEXT: [[ARRAYIDX7_PROL:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_PROL]] ; AVX512-NEXT: store double [[ADD_PROL]], double* [[ARRAYIDX7_PROL]], align 8 ; AVX512-NEXT: br label [[FOR_INC_PROL]] @@ -2066,7 +2066,7 @@ ; AVX512-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP30]] ; AVX512-NEXT: [[TMP31:%.*]] = load double, double* [[ARRAYIDX3]], align 8 ; AVX512-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP29]] to double -; AVX512-NEXT: [[ADD:%.*]] = fadd double [[TMP31]], [[CONV]] +; AVX512-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP31]] ; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV]] ; AVX512-NEXT: store double [[ADD]], double* [[ARRAYIDX7]], align 8 ; AVX512-NEXT: br label [[FOR_INC]] @@ -2083,7 +2083,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP33]] ; AVX512-NEXT: [[TMP34:%.*]] = load double, double* [[ARRAYIDX3_1]], align 8 ; AVX512-NEXT: [[CONV_1:%.*]] = sitofp i32 [[TMP32]] to double -; AVX512-NEXT: [[ADD_1:%.*]] = fadd double [[TMP34]], [[CONV_1]] +; AVX512-NEXT: [[ADD_1:%.*]] = fadd double [[CONV_1]], [[TMP34]] ; AVX512-NEXT: [[ARRAYIDX7_1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT]] ; AVX512-NEXT: store double [[ADD_1]], double* [[ARRAYIDX7_1]], align 8 ; AVX512-NEXT: br label [[FOR_INC_1]] @@ -2098,7 +2098,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP36]] ; AVX512-NEXT: [[TMP37:%.*]] = load double, double* [[ARRAYIDX3_2]], align 8 ; AVX512-NEXT: [[CONV_2:%.*]] = sitofp i32 [[TMP35]] to double -; AVX512-NEXT: [[ADD_2:%.*]] = fadd double [[TMP37]], [[CONV_2]] +; AVX512-NEXT: [[ADD_2:%.*]] = fadd double [[CONV_2]], [[TMP37]] ; AVX512-NEXT: [[ARRAYIDX7_2:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT_1]] ; AVX512-NEXT: store double [[ADD_2]], double* [[ARRAYIDX7_2]], align 8 ; AVX512-NEXT: br label [[FOR_INC_2]] @@ -2113,7 +2113,7 @@ ; AVX512-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP39]] ; AVX512-NEXT: [[TMP40:%.*]] = load double, double* [[ARRAYIDX3_3]], align 8 ; AVX512-NEXT: [[CONV_3:%.*]] = sitofp i32 [[TMP38]] to double -; AVX512-NEXT: [[ADD_3:%.*]] = fadd double [[TMP40]], [[CONV_3]] +; AVX512-NEXT: [[ADD_3:%.*]] = fadd double [[CONV_3]], [[TMP40]] ; AVX512-NEXT: [[ARRAYIDX7_3:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[INDVARS_IV_NEXT_2]] ; AVX512-NEXT: store double [[ADD_3]], double* [[ARRAYIDX7_3]], align 8 ; AVX512-NEXT: br label [[FOR_INC_3]] @@ -2416,11 +2416,11 @@ ; AVX1-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[TMP0]], [[OUT]] ; AVX1-NEXT: [[TMP1:%.*]] = bitcast double* [[SCEVGEP]] to i32* ; AVX1-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX1-NEXT: [[BOUND014:%.*]] = icmp ugt double* [[SCEVGEP12]], [[OUT]] ; AVX1-NEXT: [[BOUND115:%.*]] = icmp ugt double* [[SCEVGEP]], [[IN]] ; AVX1-NEXT: [[FOUND_CONFLICT16:%.*]] = and i1 [[BOUND014]], [[BOUND115]] -; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT16]] +; AVX1-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT16]], [[FOUND_CONFLICT]] ; AVX1-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; AVX1: vector.body: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -2534,11 +2534,11 @@ ; AVX2-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[TMP0]], [[OUT]] ; AVX2-NEXT: [[TMP1:%.*]] = bitcast double* [[SCEVGEP]] to i32* ; AVX2-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX2-NEXT: [[BOUND014:%.*]] = icmp ugt double* [[SCEVGEP12]], [[OUT]] ; AVX2-NEXT: [[BOUND115:%.*]] = icmp ugt double* [[SCEVGEP]], [[IN]] ; AVX2-NEXT: [[FOUND_CONFLICT16:%.*]] = and i1 [[BOUND014]], [[BOUND115]] -; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT16]] +; AVX2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT16]], [[FOUND_CONFLICT]] ; AVX2-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; AVX2: vector.body: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] @@ -2678,11 +2678,11 @@ ; AVX512-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[TMP0]], [[OUT]] ; AVX512-NEXT: [[TMP1:%.*]] = bitcast double* [[SCEVGEP]] to i32* ; AVX512-NEXT: [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[TRIGGER]] -; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND1]], [[BOUND0]] ; AVX512-NEXT: [[BOUND014:%.*]] = icmp ugt double* [[SCEVGEP12]], [[OUT]] ; AVX512-NEXT: [[BOUND115:%.*]] = icmp ugt double* [[SCEVGEP]], [[IN]] ; AVX512-NEXT: [[FOUND_CONFLICT16:%.*]] = and i1 [[BOUND014]], [[BOUND115]] -; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT16]] +; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT16]], [[FOUND_CONFLICT]] ; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] Index: test/Transforms/Reassociate/reassociate-after-unroll.ll =================================================================== --- test/Transforms/Reassociate/reassociate-after-unroll.ll +++ test/Transforms/Reassociate/reassociate-after-unroll.ll @@ -0,0 +1,55 @@ +; RUN: opt -O2 -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define dso_local i64 @func(i64 %blah, i64 %limit) #0 { +entry: + %blah.addr = alloca i64, align 8 + %limit.addr = alloca i64, align 8 + %k = alloca i32, align 4 + %g = alloca i64, align 8 + %i = alloca i64, align 8 + store i64 %blah, i64* %blah.addr, align 8 + store i64 %limit, i64* %limit.addr, align 8 + store i32 1, i32* %k, align 4 + store i64 0, i64* %i, align 8 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %0 = load i64, i64* %i, align 8 + %1 = load i64, i64* %limit.addr, align 8 + %cmp = icmp ult i64 %0, %1 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %2 = load i64, i64* %g, align 8 + ret i64 %2 + +; CHECK: for.body: +; CHECK: mul i64 %{{.*}}, 8 +for.body: ; preds = %for.cond + %3 = load i64, i64* %blah.addr, align 8 + %4 = load i32, i32* %k, align 4 + %conv = zext i32 %4 to i64 + %and = and i64 %conv, %3 + %conv1 = trunc i64 %and to i32 + store i32 %conv1, i32* %k, align 4 + %5 = load i32, i32* %k, align 4 + %conv2 = zext i32 %5 to i64 + %6 = load i64, i64* %g, align 8 + %add = add i64 %6, %conv2 + store i64 %add, i64* %g, align 8 + %7 = load i64, i64* %i, align 8 + %inc = add i64 %7, 1 + store i64 %inc, i64* %i, align 8 + br label %for.cond +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +attributes #0 = { "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind }