Index: llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp @@ -395,7 +395,7 @@ return Builder.CreateShl(NegOp0, I->getOperand(1), I->getName() + ".neg"); // Otherwise, `shl %x, C` can be interpreted as `mul %x, 1<(I->getOperand(1)); - if (!Op1C) // Early return. + if (!Op1C || !IsTrulyNegation) return nullptr; return Builder.CreateMul( I->getOperand(0), Index: llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll +++ llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll @@ -147,7 +147,7 @@ ; CHECK-LABEL: @local_size_x_8_16_2_wrong_group_id( ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.y() -; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8 +; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 define amdgpu_kernel void @local_size_x_8_16_2_wrong_group_id(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 @@ -169,7 +169,7 @@ ; CHECK-LABEL: @local_size_x_8_16_2_wrong_grid_size( ; CHECK: %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() -; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8 +; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 define amdgpu_kernel void @local_size_x_8_16_2_wrong_grid_size(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 @@ -191,8 +191,8 @@ ; CHECK-LABEL: @local_size_x_8_16_2_wrong_cmp_type( ; CHECK: %grid.size.x = load i32, i32 addrspace(4)* %gep.grid.size.x.bc, align 4 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() -; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8 -; CHECK: %sub = add i32 %group.id_x_group.size.x.neg, %grid.size.x +; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 +; CHECK: %sub = sub i32 %grid.size.x, %group.id_x_group.size.x ; CHECK: %smin = call i32 @llvm.smin.i32(i32 %sub, i32 8) define amdgpu_kernel void @local_size_x_8_16_2_wrong_cmp_type(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() @@ -213,8 +213,8 @@ } ; CHECK-LABEL: @local_size_x_8_16_2_wrong_select( -; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8 -; CHECK: %sub = add i32 %group.id_x_group.size.x.neg, %grid.size.x +; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 +; CHECK: %sub = sub i32 %grid.size.x, %group.id_x_group.size.x ; CHECK: %umax = call i32 @llvm.umax.i32(i32 %sub, i32 8) ; CHECK: %zext = zext i32 %umax to i64 define amdgpu_kernel void @local_size_x_8_16_2_wrong_select(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { @@ -239,8 +239,8 @@ ; CHECK: %grid.size.x = load i16, i16 addrspace(4)* %gep.grid.size.x.bc, align 4 ; CHECK: %grid.size.x.zext = zext i16 %grid.size.x to i32 ; CHECK: %group.id = tail call i32 @llvm.amdgcn.workgroup.id.x() -; CHECK: %group.id_x_group.size.x.neg = mul i32 %group.id, -8 -; CHECK: %sub = add i32 %group.id_x_group.size.x.neg, %grid.size.x.zext +; CHECK: %group.id_x_group.size.x = shl i32 %group.id, 3 +; CHECK: %sub = sub i32 %grid.size.x.zext, %group.id_x_group.size.x define amdgpu_kernel void @use_local_size_x_8_16_2_wrong_grid_load_size(i64 addrspace(1)* %out) #0 !reqd_work_group_size !0 { %dispatch.ptr = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() %gep.group.size.x = getelementptr inbounds i8, i8 addrspace(4)* %dispatch.ptr, i64 4 Index: llvm/test/Transforms/InstCombine/mul-inseltpoison.ll =================================================================== --- llvm/test/Transforms/InstCombine/mul-inseltpoison.ll +++ llvm/test/Transforms/InstCombine/mul-inseltpoison.ll @@ -1029,8 +1029,8 @@ define i32 @muladd2(i32 %a0) { ; CHECK-LABEL: @muladd2( -; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[A0:%.*]], -4 -; CHECK-NEXT: [[MUL:%.*]] = add i32 [[DOTNEG]], -64 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], 2 +; CHECK-NEXT: [[MUL:%.*]] = sub i32 -64, [[TMP1]] ; CHECK-NEXT: ret i32 [[MUL]] ; %add = add i32 %a0, 16 @@ -1040,8 +1040,8 @@ define <2 x i32> @muladd2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @muladd2_vec( -; CHECK-NEXT: [[DOTNEG:%.*]] = mul <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[DOTNEG]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> , [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %add = add <2 x i32> %a0, Index: llvm/test/Transforms/InstCombine/mul.ll =================================================================== --- llvm/test/Transforms/InstCombine/mul.ll +++ llvm/test/Transforms/InstCombine/mul.ll @@ -1359,8 +1359,8 @@ define i32 @muladd2(i32 %a0) { ; CHECK-LABEL: @muladd2( -; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[A0:%.*]], -4 -; CHECK-NEXT: [[MUL:%.*]] = add i32 [[DOTNEG]], -64 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A0:%.*]], 2 +; CHECK-NEXT: [[MUL:%.*]] = sub i32 -64, [[TMP1]] ; CHECK-NEXT: ret i32 [[MUL]] ; %add = add i32 %a0, 16 @@ -1370,8 +1370,8 @@ define <2 x i32> @muladd2_vec(<2 x i32> %a0) { ; CHECK-LABEL: @muladd2_vec( -; CHECK-NEXT: [[DOTNEG:%.*]] = mul <2 x i32> [[A0:%.*]], -; CHECK-NEXT: [[MUL:%.*]] = add <2 x i32> [[DOTNEG]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], +; CHECK-NEXT: [[MUL:%.*]] = sub <2 x i32> , [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %add = add <2 x i32> %a0, Index: llvm/test/Transforms/InstCombine/shl-bo.ll =================================================================== --- llvm/test/Transforms/InstCombine/shl-bo.ll +++ llvm/test/Transforms/InstCombine/shl-bo.ll @@ -51,8 +51,8 @@ define <2 x i8> @lshr_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_sub_commute_splat( ; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1_NEG:%.*]] = mul <2 x i8> [[X]], -; CHECK-NEXT: [[R2:%.*]] = add <2 x i8> [[B1_NEG]], [[Y:%.*]] +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], +; CHECK-NEXT: [[R2:%.*]] = sub <2 x i8> [[Y:%.*]], [[B1]] ; CHECK-NEXT: [[L:%.*]] = and <2 x i8> [[R2]], ; CHECK-NEXT: ret <2 x i8> [[L]] ; @@ -233,9 +233,9 @@ define <2 x i8> @lshr_and_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) { ; CHECK-LABEL: @lshr_and_sub_commute_splat( ; CHECK-NEXT: [[X:%.*]] = srem <2 x i8> [[A:%.*]], -; CHECK-NEXT: [[B1_NEG:%.*]] = mul <2 x i8> [[X]], +; CHECK-NEXT: [[B1:%.*]] = shl <2 x i8> [[X]], ; CHECK-NEXT: [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], -; CHECK-NEXT: [[L:%.*]] = add <2 x i8> [[B1_NEG]], [[Y_MASK]] +; CHECK-NEXT: [[L:%.*]] = sub <2 x i8> [[Y_MASK]], [[B1]] ; CHECK-NEXT: ret <2 x i8> [[L]] ; %x = srem <2 x i8> %a, ; thwart complexity-based canonicalization Index: llvm/test/Transforms/InstCombine/shl-sub.ll =================================================================== --- llvm/test/Transforms/InstCombine/shl-sub.ll +++ llvm/test/Transforms/InstCombine/shl-sub.ll @@ -180,8 +180,8 @@ define i32 @shl_const_op1_sub_const_op0(i32 %x) { ; CHECK-LABEL: @shl_const_op1_sub_const_op0( -; CHECK-NEXT: [[S_NEG:%.*]] = mul i32 [[X:%.*]], -8 -; CHECK-NEXT: [[R:%.*]] = add i32 [[S_NEG]], 336 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 3 +; CHECK-NEXT: [[R:%.*]] = sub i32 336, [[TMP1]] ; CHECK-NEXT: ret i32 [[R]] ; %s = sub i32 42, %x @@ -191,8 +191,8 @@ define <2 x i32> @shl_const_op1_sub_const_op0_splat(<2 x i32> %x) { ; CHECK-LABEL: @shl_const_op1_sub_const_op0_splat( -; CHECK-NEXT: [[S_NEG:%.*]] = mul <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[S_NEG]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[R:%.*]] = sub <2 x i32> , [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = sub <2 x i32> , %x Index: llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll =================================================================== --- llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll +++ llvm/test/Transforms/InstCombine/sub-of-negatible-inseltpoison.ll @@ -1217,8 +1217,8 @@ ; CHECK-LABEL: @negate_left_shift_by_constant( ; CHECK-NEXT: [[T0:%.*]] = sub i8 [[K:%.*]], [[Z:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1_NEG:%.*]] = mul i8 [[T0]], -16 -; CHECK-NEXT: [[T2:%.*]] = add i8 [[T1_NEG]], [[X:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 4 +; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] ; CHECK-NEXT: ret i8 [[T2]] ; %t0 = sub i8 %k, %z @@ -1329,7 +1329,7 @@ ; CHECK-LABEL: @negate_select_of_op_vs_negated_op( ; CHECK-NEXT: [[T0:%.*]] = sub i8 0, [[X:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i8 [[X]], i8 [[T0]], !prof !0 +; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C:%.*]], i8 [[X]], i8 [[T0]], !prof [[PROF0:![0-9]+]] ; CHECK-NEXT: [[T2:%.*]] = add i8 [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret i8 [[T2]] ; Index: llvm/test/Transforms/InstCombine/sub-of-negatible.ll =================================================================== --- llvm/test/Transforms/InstCombine/sub-of-negatible.ll +++ llvm/test/Transforms/InstCombine/sub-of-negatible.ll @@ -1241,8 +1241,8 @@ ; CHECK-LABEL: @negate_left_shift_by_constant( ; CHECK-NEXT: [[T0:%.*]] = sub i8 [[K:%.*]], [[Z:%.*]] ; CHECK-NEXT: call void @use8(i8 [[T0]]) -; CHECK-NEXT: [[T1_NEG:%.*]] = mul i8 [[T0]], -16 -; CHECK-NEXT: [[T2:%.*]] = add i8 [[T1_NEG]], [[X:%.*]] +; CHECK-NEXT: [[T1:%.*]] = shl i8 [[T0]], 4 +; CHECK-NEXT: [[T2:%.*]] = sub i8 [[X:%.*]], [[T1]] ; CHECK-NEXT: ret i8 [[T2]] ; %t0 = sub i8 %k, %z Index: llvm/test/Transforms/InstCombine/sub.ll =================================================================== --- llvm/test/Transforms/InstCombine/sub.ll +++ llvm/test/Transforms/InstCombine/sub.ll @@ -1182,8 +1182,8 @@ define i32 @test62(i32 %A) { ; CHECK-LABEL: @test62( -; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[A:%.*]], -2 -; CHECK-NEXT: [[C:%.*]] = add i32 [[DOTNEG]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A:%.*]], 1 +; CHECK-NEXT: [[C:%.*]] = sub i32 2, [[TMP1]] ; CHECK-NEXT: ret i32 [[C]] ; %B = sub i32 1, %A @@ -1193,8 +1193,8 @@ define <2 x i32> @test62vec(<2 x i32> %A) { ; CHECK-LABEL: @test62vec( -; CHECK-NEXT: [[DOTNEG:%.*]] = mul <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[DOTNEG]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[C:%.*]] = sub <2 x i32> , [[TMP1]] ; CHECK-NEXT: ret <2 x i32> [[C]] ; %B = sub <2 x i32> , %A @@ -1204,8 +1204,8 @@ define i32 @test63(i32 %A) { ; CHECK-LABEL: @test63( -; CHECK-NEXT: [[DOTNEG_NEG:%.*]] = shl i32 [[A:%.*]], 1 -; CHECK-NEXT: ret i32 [[DOTNEG_NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[A:%.*]], 1 +; CHECK-NEXT: ret i32 [[TMP1]] ; %B = sub i32 1, %A %C = shl i32 %B, 1 @@ -1215,8 +1215,8 @@ define <2 x i32> @test63vec(<2 x i32> %A) { ; CHECK-LABEL: @test63vec( -; CHECK-NEXT: [[DOTNEG_NEG:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: ret <2 x i32> [[DOTNEG_NEG]] +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; %B = sub <2 x i32> , %A %C = shl <2 x i32> %B, Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll @@ -30,26 +30,26 @@ ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], [[N]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[TMP7]], -8 -; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[DOTNEG]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP11]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[DOTNEG2:%.*]] = mul i32 [[TMP14]], -8 -; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[DOTNEG2]], 1 -; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 [[TMP16]] -; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to * -; CHECK-NEXT: store [[TMP13]], * [[TMP18]], align 8 -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = shl i64 [[TMP19]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] -; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = shl i32 [[TMP7]], 3 +; CHECK-NEXT: [[TMP9:%.*]] = sub i32 1, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP12]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd [[WIDE_LOAD]], shufflevector ( insertelement ( poison, double 1.000000e+00, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP17:%.*]] = sub i32 1, [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 [[TMP18]] +; CHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP19]] to * +; CHECK-NEXT: store [[TMP14]], * [[TMP20]], align 8 +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP22:%.*]] = shl i64 [[TMP21]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -64,8 +64,8 @@ ; CHECK-NEXT: [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_08]] = add nsw i64 [[I_08_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[I_08]] -; CHECK-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP22]], 1.000000e+00 +; CHECK-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP24]], 1.000000e+00 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 [[I_08]] ; CHECK-NEXT: store double [[ADD]], double* [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1 @@ -124,26 +124,26 @@ ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[N]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[DOTNEG:%.*]] = mul i32 [[TMP13]], -8 -; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[DOTNEG]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to * -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP17]], align 8 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP19:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[DOTNEG4:%.*]] = mul i32 [[TMP20]], -8 -; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[DOTNEG4]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[TMP21]] to i64 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i64, i64* [[TMP18]], i64 [[TMP22]] -; CHECK-NEXT: [[TMP24:%.*]] = bitcast i64* [[TMP23]] to * -; CHECK-NEXT: store [[TMP19]], * [[TMP24]], align 8 -; CHECK-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP26:%.*]] = shl i64 [[TMP25]], 3 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP26]] -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = shl i32 [[TMP13]], 3 +; CHECK-NEXT: [[TMP15:%.*]] = sub i32 1, [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, i64* [[TMP12]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i64* [[TMP17]] to * +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , * [[TMP18]], align 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = add [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 1, i32 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP22:%.*]] = shl i32 [[TMP21]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = sub i32 1, [[TMP22]] +; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, i64* [[TMP19]], i64 [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = bitcast i64* [[TMP25]] to * +; CHECK-NEXT: store [[TMP20]], * [[TMP26]], align 8 +; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP28:%.*]] = shl i64 [[TMP27]], 3 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -158,8 +158,8 @@ ; CHECK-NEXT: [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[I_09]] = add nsw i64 [[I_09_IN]], -1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 [[I_09]] -; CHECK-NEXT: [[TMP28:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP28]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = load i64, i64* [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP30]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I_09]] ; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX2]], align 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1