Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3990,7 +3990,12 @@ }; InstructionsState S = getSameOpcode(VL); - if (Depth == RecursionMaxDepth) { + + // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of + // a load), in which case peak through to include it in the tree, without + // balooning over-budget. + if (Depth >= RecursionMaxDepth && + !(S.MainOp && match(S.MainOp, m_ZExtOrSExtOrSelf(m_Load(m_Value()))))) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); if (TryToFindDuplicates(S)) newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, Index: llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll +++ llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll @@ -1241,421 +1241,104 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[ST1:%.*]] to i64 ; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[ST2:%.*]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[P1:%.*]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[P2:%.*]], align 1 -; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP1]] to i32 -; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV2]] -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 4 -; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX3]], align 1 -; CHECK-NEXT: [[CONV4:%.*]] = zext i8 [[TMP2]] to i32 -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 4 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[ARRAYIDX5]], align 1 -; CHECK-NEXT: [[CONV6:%.*]] = zext i8 [[TMP3]] to i32 -; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i32 [[CONV4]], [[CONV6]] -; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SUB7]], 16 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[SHL]], [[SUB]] -; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX8]], align 1 -; CHECK-NEXT: [[CONV9:%.*]] = zext i8 [[TMP4]] to i32 -; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[ARRAYIDX10]], align 1 -; CHECK-NEXT: [[CONV11:%.*]] = zext i8 [[TMP5]] to i32 -; CHECK-NEXT: [[SUB12:%.*]] = sub nsw i32 [[CONV9]], [[CONV11]] -; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 5 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, i8* [[ARRAYIDX13]], align 1 -; CHECK-NEXT: [[CONV14:%.*]] = zext i8 [[TMP6]] to i32 -; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 5 -; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[ARRAYIDX15]], align 1 -; CHECK-NEXT: [[CONV16:%.*]] = zext i8 [[TMP7]] to i32 -; CHECK-NEXT: [[SUB17:%.*]] = sub nsw i32 [[CONV14]], [[CONV16]] -; CHECK-NEXT: [[SHL18:%.*]] = shl nsw i32 [[SUB17]], 16 -; CHECK-NEXT: [[ADD19:%.*]] = add nsw i32 [[SHL18]], [[SUB12]] -; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 2 -; CHECK-NEXT: [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX20]], align 1 -; CHECK-NEXT: [[CONV21:%.*]] = zext i8 [[TMP8]] to i32 -; CHECK-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 2 -; CHECK-NEXT: [[TMP9:%.*]] = load i8, i8* [[ARRAYIDX22]], align 1 -; CHECK-NEXT: [[CONV23:%.*]] = zext i8 [[TMP9]] to i32 -; CHECK-NEXT: [[SUB24:%.*]] = sub nsw i32 [[CONV21]], [[CONV23]] -; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 6 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, i8* [[ARRAYIDX25]], align 1 -; CHECK-NEXT: [[CONV26:%.*]] = zext i8 [[TMP10]] to i32 -; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 6 -; CHECK-NEXT: [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX27]], align 1 -; CHECK-NEXT: [[CONV28:%.*]] = zext i8 [[TMP11]] to i32 -; CHECK-NEXT: [[SUB29:%.*]] = sub nsw i32 [[CONV26]], [[CONV28]] -; CHECK-NEXT: [[SHL30:%.*]] = shl nsw i32 [[SUB29]], 16 -; CHECK-NEXT: [[ADD31:%.*]] = add nsw i32 [[SHL30]], [[SUB24]] -; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 3 -; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX32]], align 1 -; CHECK-NEXT: [[CONV33:%.*]] = zext i8 [[TMP12]] to i32 -; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 3 -; CHECK-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX34]], align 1 -; CHECK-NEXT: [[CONV35:%.*]] = zext i8 [[TMP13]] to i32 -; CHECK-NEXT: [[SUB36:%.*]] = sub nsw i32 [[CONV33]], [[CONV35]] -; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 7 -; CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[ARRAYIDX37]], align 1 -; CHECK-NEXT: [[CONV38:%.*]] = zext i8 [[TMP14]] to i32 -; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 7 -; CHECK-NEXT: [[TMP15:%.*]] = load i8, i8* [[ARRAYIDX39]], align 1 -; CHECK-NEXT: [[CONV40:%.*]] = zext i8 [[TMP15]] to i32 -; CHECK-NEXT: [[SUB41:%.*]] = sub nsw i32 [[CONV38]], [[CONV40]] -; CHECK-NEXT: [[SHL42:%.*]] = shl nsw i32 [[SUB41]], 16 -; CHECK-NEXT: [[ADD43:%.*]] = add nsw i32 [[SHL42]], [[SUB36]] -; CHECK-NEXT: [[ADD44:%.*]] = add nsw i32 [[ADD19]], [[ADD]] -; CHECK-NEXT: [[SUB45:%.*]] = sub nsw i32 [[ADD]], [[ADD19]] -; CHECK-NEXT: [[ADD46:%.*]] = add nsw i32 [[ADD43]], [[ADD31]] -; CHECK-NEXT: [[SUB47:%.*]] = sub nsw i32 [[ADD31]], [[ADD43]] -; CHECK-NEXT: [[ADD48:%.*]] = add nsw i32 [[ADD46]], [[ADD44]] -; CHECK-NEXT: [[SUB51:%.*]] = sub nsw i32 [[ADD44]], [[ADD46]] -; CHECK-NEXT: [[ADD55:%.*]] = add nsw i32 [[SUB47]], [[SUB45]] -; CHECK-NEXT: [[SUB59:%.*]] = sub nsw i32 [[SUB45]], [[SUB47]] +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[P2]] to <4 x i8>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>* +; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>* +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP16:%.*]] = load i8, i8* [[ADD_PTR]], align 1 -; CHECK-NEXT: [[CONV_1:%.*]] = zext i8 [[TMP16]] to i32 -; CHECK-NEXT: [[TMP17:%.*]] = load i8, i8* [[ADD_PTR64]], align 1 -; CHECK-NEXT: [[CONV2_1:%.*]] = zext i8 [[TMP17]] to i32 -; CHECK-NEXT: [[SUB_1:%.*]] = sub nsw i32 [[CONV_1]], [[CONV2_1]] ; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4 -; CHECK-NEXT: [[TMP18:%.*]] = load i8, i8* [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[CONV4_1:%.*]] = zext i8 [[TMP18]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4 -; CHECK-NEXT: [[TMP19:%.*]] = load i8, i8* [[ARRAYIDX5_1]], align 1 -; CHECK-NEXT: [[CONV6_1:%.*]] = zext i8 [[TMP19]] to i32 -; CHECK-NEXT: [[SUB7_1:%.*]] = sub nsw i32 [[CONV4_1]], [[CONV6_1]] -; CHECK-NEXT: [[SHL_1:%.*]] = shl nsw i32 [[SUB7_1]], 16 -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[SHL_1]], [[SUB_1]] -; CHECK-NEXT: [[ARRAYIDX8_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 1 -; CHECK-NEXT: [[TMP20:%.*]] = load i8, i8* [[ARRAYIDX8_1]], align 1 -; CHECK-NEXT: [[CONV9_1:%.*]] = zext i8 [[TMP20]] to i32 -; CHECK-NEXT: [[ARRAYIDX10_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 1 -; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[ARRAYIDX10_1]], align 1 -; CHECK-NEXT: [[CONV11_1:%.*]] = zext i8 [[TMP21]] to i32 -; CHECK-NEXT: [[SUB12_1:%.*]] = sub nsw i32 [[CONV9_1]], [[CONV11_1]] -; CHECK-NEXT: [[ARRAYIDX13_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 5 -; CHECK-NEXT: [[TMP22:%.*]] = load i8, i8* [[ARRAYIDX13_1]], align 1 -; CHECK-NEXT: [[CONV14_1:%.*]] = zext i8 [[TMP22]] to i32 -; CHECK-NEXT: [[ARRAYIDX15_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 5 -; CHECK-NEXT: [[TMP23:%.*]] = load i8, i8* [[ARRAYIDX15_1]], align 1 -; CHECK-NEXT: [[CONV16_1:%.*]] = zext i8 [[TMP23]] to i32 -; CHECK-NEXT: [[SUB17_1:%.*]] = sub nsw i32 [[CONV14_1]], [[CONV16_1]] -; CHECK-NEXT: [[SHL18_1:%.*]] = shl nsw i32 [[SUB17_1]], 16 -; CHECK-NEXT: [[ADD19_1:%.*]] = add nsw i32 [[SHL18_1]], [[SUB12_1]] -; CHECK-NEXT: [[ARRAYIDX20_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 2 -; CHECK-NEXT: [[TMP24:%.*]] = load i8, i8* [[ARRAYIDX20_1]], align 1 -; CHECK-NEXT: [[CONV21_1:%.*]] = zext i8 [[TMP24]] to i32 -; CHECK-NEXT: [[ARRAYIDX22_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 2 -; CHECK-NEXT: [[TMP25:%.*]] = load i8, i8* [[ARRAYIDX22_1]], align 1 -; CHECK-NEXT: [[CONV23_1:%.*]] = zext i8 [[TMP25]] to i32 -; CHECK-NEXT: [[SUB24_1:%.*]] = sub nsw i32 [[CONV21_1]], [[CONV23_1]] -; CHECK-NEXT: [[ARRAYIDX25_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 6 -; CHECK-NEXT: [[TMP26:%.*]] = load i8, i8* [[ARRAYIDX25_1]], align 1 -; CHECK-NEXT: [[CONV26_1:%.*]] = zext i8 [[TMP26]] to i32 -; CHECK-NEXT: [[ARRAYIDX27_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 6 -; CHECK-NEXT: [[TMP27:%.*]] = load i8, i8* [[ARRAYIDX27_1]], align 1 -; CHECK-NEXT: [[CONV28_1:%.*]] = zext i8 [[TMP27]] to i32 -; CHECK-NEXT: [[SUB29_1:%.*]] = sub nsw i32 [[CONV26_1]], [[CONV28_1]] -; CHECK-NEXT: [[SHL30_1:%.*]] = shl nsw i32 [[SUB29_1]], 16 -; CHECK-NEXT: [[ADD31_1:%.*]] = add nsw i32 [[SHL30_1]], [[SUB24_1]] -; CHECK-NEXT: [[ARRAYIDX32_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 3 -; CHECK-NEXT: [[TMP28:%.*]] = load i8, i8* [[ARRAYIDX32_1]], align 1 -; CHECK-NEXT: [[CONV33_1:%.*]] = zext i8 [[TMP28]] to i32 -; CHECK-NEXT: [[ARRAYIDX34_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 3 -; CHECK-NEXT: [[TMP29:%.*]] = load i8, i8* [[ARRAYIDX34_1]], align 1 -; CHECK-NEXT: [[CONV35_1:%.*]] = zext i8 [[TMP29]] to i32 -; CHECK-NEXT: [[SUB36_1:%.*]] = sub nsw i32 [[CONV33_1]], [[CONV35_1]] -; CHECK-NEXT: [[ARRAYIDX37_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 7 -; CHECK-NEXT: [[TMP30:%.*]] = load i8, i8* [[ARRAYIDX37_1]], align 1 -; CHECK-NEXT: [[CONV38_1:%.*]] = zext i8 [[TMP30]] to i32 -; CHECK-NEXT: [[ARRAYIDX39_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 7 -; CHECK-NEXT: [[TMP31:%.*]] = load i8, i8* [[ARRAYIDX39_1]], align 1 -; CHECK-NEXT: [[CONV40_1:%.*]] = zext i8 [[TMP31]] to i32 -; CHECK-NEXT: [[SUB41_1:%.*]] = sub nsw i32 [[CONV38_1]], [[CONV40_1]] -; CHECK-NEXT: [[SHL42_1:%.*]] = shl nsw i32 [[SUB41_1]], 16 -; CHECK-NEXT: [[ADD43_1:%.*]] = add nsw i32 [[SHL42_1]], [[SUB36_1]] -; CHECK-NEXT: [[ADD44_1:%.*]] = add nsw i32 [[ADD19_1]], [[ADD_1]] -; CHECK-NEXT: [[SUB45_1:%.*]] = sub nsw i32 [[ADD_1]], [[ADD19_1]] -; CHECK-NEXT: [[ADD46_1:%.*]] = add nsw i32 [[ADD43_1]], [[ADD31_1]] -; CHECK-NEXT: [[SUB47_1:%.*]] = sub nsw i32 [[ADD31_1]], [[ADD43_1]] -; CHECK-NEXT: [[ADD48_1:%.*]] = add nsw i32 [[ADD46_1]], [[ADD44_1]] -; CHECK-NEXT: [[SUB51_1:%.*]] = sub nsw i32 [[ADD44_1]], [[ADD46_1]] -; CHECK-NEXT: [[ADD55_1:%.*]] = add nsw i32 [[SUB47_1]], [[SUB45_1]] -; CHECK-NEXT: [[SUB59_1:%.*]] = sub nsw i32 [[SUB45_1]], [[SUB47_1]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>* +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>* +; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>* +; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, <4 x i8>* [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>* +; CHECK-NEXT: [[TMP15:%.*]] = load <4 x i8>, <4 x i8>* [[TMP14]], align 1 ; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP32:%.*]] = load i8, i8* [[ADD_PTR_1]], align 1 -; CHECK-NEXT: [[CONV_2:%.*]] = zext i8 [[TMP32]] to i32 -; CHECK-NEXT: [[TMP33:%.*]] = load i8, i8* [[ADD_PTR64_1]], align 1 -; CHECK-NEXT: [[CONV2_2:%.*]] = zext i8 [[TMP33]] to i32 -; CHECK-NEXT: [[SUB_2:%.*]] = sub nsw i32 [[CONV_2]], [[CONV2_2]] ; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 4 -; CHECK-NEXT: [[TMP34:%.*]] = load i8, i8* [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[CONV4_2:%.*]] = zext i8 [[TMP34]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 4 -; CHECK-NEXT: [[TMP35:%.*]] = load i8, i8* [[ARRAYIDX5_2]], align 1 -; CHECK-NEXT: [[CONV6_2:%.*]] = zext i8 [[TMP35]] to i32 -; CHECK-NEXT: [[SUB7_2:%.*]] = sub nsw i32 [[CONV4_2]], [[CONV6_2]] -; CHECK-NEXT: [[SHL_2:%.*]] = shl nsw i32 [[SUB7_2]], 16 -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[SHL_2]], [[SUB_2]] -; CHECK-NEXT: [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 1 -; CHECK-NEXT: [[TMP36:%.*]] = load i8, i8* [[ARRAYIDX8_2]], align 1 -; CHECK-NEXT: [[CONV9_2:%.*]] = zext i8 [[TMP36]] to i32 -; CHECK-NEXT: [[ARRAYIDX10_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 1 -; CHECK-NEXT: [[TMP37:%.*]] = load i8, i8* [[ARRAYIDX10_2]], align 1 -; CHECK-NEXT: [[CONV11_2:%.*]] = zext i8 [[TMP37]] to i32 -; CHECK-NEXT: [[SUB12_2:%.*]] = sub nsw i32 [[CONV9_2]], [[CONV11_2]] -; CHECK-NEXT: [[ARRAYIDX13_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 5 -; CHECK-NEXT: [[TMP38:%.*]] = load i8, i8* [[ARRAYIDX13_2]], align 1 -; CHECK-NEXT: [[CONV14_2:%.*]] = zext i8 [[TMP38]] to i32 -; CHECK-NEXT: [[ARRAYIDX15_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 5 -; CHECK-NEXT: [[TMP39:%.*]] = load i8, i8* [[ARRAYIDX15_2]], align 1 -; CHECK-NEXT: [[CONV16_2:%.*]] = zext i8 [[TMP39]] to i32 -; CHECK-NEXT: [[SUB17_2:%.*]] = sub nsw i32 [[CONV14_2]], [[CONV16_2]] -; CHECK-NEXT: [[SHL18_2:%.*]] = shl nsw i32 [[SUB17_2]], 16 -; CHECK-NEXT: [[ADD19_2:%.*]] = add nsw i32 [[SHL18_2]], [[SUB12_2]] -; CHECK-NEXT: [[ARRAYIDX20_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 2 -; CHECK-NEXT: [[TMP40:%.*]] = load i8, i8* [[ARRAYIDX20_2]], align 1 -; CHECK-NEXT: [[CONV21_2:%.*]] = zext i8 [[TMP40]] to i32 -; CHECK-NEXT: [[ARRAYIDX22_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 2 -; CHECK-NEXT: [[TMP41:%.*]] = load i8, i8* [[ARRAYIDX22_2]], align 1 -; CHECK-NEXT: [[CONV23_2:%.*]] = zext i8 [[TMP41]] to i32 -; CHECK-NEXT: [[SUB24_2:%.*]] = sub nsw i32 [[CONV21_2]], [[CONV23_2]] -; CHECK-NEXT: [[ARRAYIDX25_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 6 -; CHECK-NEXT: [[TMP42:%.*]] = load i8, i8* [[ARRAYIDX25_2]], align 1 -; CHECK-NEXT: [[CONV26_2:%.*]] = zext i8 [[TMP42]] to i32 -; CHECK-NEXT: [[ARRAYIDX27_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 6 -; CHECK-NEXT: [[TMP43:%.*]] = load i8, i8* [[ARRAYIDX27_2]], align 1 -; CHECK-NEXT: [[CONV28_2:%.*]] = zext i8 [[TMP43]] to i32 -; CHECK-NEXT: [[SUB29_2:%.*]] = sub nsw i32 [[CONV26_2]], [[CONV28_2]] -; CHECK-NEXT: [[SHL30_2:%.*]] = shl nsw i32 [[SUB29_2]], 16 -; CHECK-NEXT: [[ADD31_2:%.*]] = add nsw i32 [[SHL30_2]], [[SUB24_2]] -; CHECK-NEXT: [[ARRAYIDX32_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 3 -; CHECK-NEXT: [[TMP44:%.*]] = load i8, i8* [[ARRAYIDX32_2]], align 1 -; CHECK-NEXT: [[CONV33_2:%.*]] = zext i8 [[TMP44]] to i32 -; CHECK-NEXT: [[ARRAYIDX34_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 3 -; CHECK-NEXT: [[TMP45:%.*]] = load i8, i8* [[ARRAYIDX34_2]], align 1 -; CHECK-NEXT: [[CONV35_2:%.*]] = zext i8 [[TMP45]] to i32 -; CHECK-NEXT: [[SUB36_2:%.*]] = sub nsw i32 [[CONV33_2]], [[CONV35_2]] -; CHECK-NEXT: [[ARRAYIDX37_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 7 -; CHECK-NEXT: [[TMP46:%.*]] = load i8, i8* [[ARRAYIDX37_2]], align 1 -; CHECK-NEXT: [[CONV38_2:%.*]] = zext i8 [[TMP46]] to i32 -; CHECK-NEXT: [[ARRAYIDX39_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 7 -; CHECK-NEXT: [[TMP47:%.*]] = load i8, i8* [[ARRAYIDX39_2]], align 1 -; CHECK-NEXT: [[CONV40_2:%.*]] = zext i8 [[TMP47]] to i32 -; CHECK-NEXT: [[SUB41_2:%.*]] = sub nsw i32 [[CONV38_2]], [[CONV40_2]] -; CHECK-NEXT: [[SHL42_2:%.*]] = shl nsw i32 [[SUB41_2]], 16 -; CHECK-NEXT: [[ADD43_2:%.*]] = add nsw i32 [[SHL42_2]], [[SUB36_2]] -; CHECK-NEXT: [[ADD44_2:%.*]] = add nsw i32 [[ADD19_2]], [[ADD_2]] -; CHECK-NEXT: [[SUB45_2:%.*]] = sub nsw i32 [[ADD_2]], [[ADD19_2]] -; CHECK-NEXT: [[ADD46_2:%.*]] = add nsw i32 [[ADD43_2]], [[ADD31_2]] -; CHECK-NEXT: [[SUB47_2:%.*]] = sub nsw i32 [[ADD31_2]], [[ADD43_2]] -; CHECK-NEXT: [[ADD48_2:%.*]] = add nsw i32 [[ADD46_2]], [[ADD44_2]] -; CHECK-NEXT: [[SUB51_2:%.*]] = sub nsw i32 [[ADD44_2]], [[ADD46_2]] -; CHECK-NEXT: [[ADD55_2:%.*]] = add nsw i32 [[SUB47_2]], [[SUB45_2]] -; CHECK-NEXT: [[SUB59_2:%.*]] = sub nsw i32 [[SUB45_2]], [[SUB47_2]] +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[ADD_PTR_1]] to <4 x i8>* +; CHECK-NEXT: [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[ADD_PTR64_1]] to <4 x i8>* +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX3_2]] to <4 x i8>* +; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8* [[ARRAYIDX5_2]] to <4 x i8>* +; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1 ; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 [[IDX_EXT63]] -; CHECK-NEXT: [[TMP48:%.*]] = load i8, i8* [[ADD_PTR_2]], align 1 -; CHECK-NEXT: [[CONV_3:%.*]] = zext i8 [[TMP48]] to i32 -; CHECK-NEXT: [[TMP49:%.*]] = load i8, i8* [[ADD_PTR64_2]], align 1 -; CHECK-NEXT: [[CONV2_3:%.*]] = zext i8 [[TMP49]] to i32 -; CHECK-NEXT: [[SUB_3:%.*]] = sub nsw i32 [[CONV_3]], [[CONV2_3]] ; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 4 -; CHECK-NEXT: [[TMP50:%.*]] = load i8, i8* [[ARRAYIDX3_3]], align 1 -; CHECK-NEXT: [[CONV4_3:%.*]] = zext i8 [[TMP50]] to i32 ; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 4 -; CHECK-NEXT: [[TMP51:%.*]] = load i8, i8* [[ARRAYIDX5_3]], align 1 -; CHECK-NEXT: [[CONV6_3:%.*]] = zext i8 [[TMP51]] to i32 -; CHECK-NEXT: [[SUB7_3:%.*]] = sub nsw i32 [[CONV4_3]], [[CONV6_3]] -; CHECK-NEXT: [[SHL_3:%.*]] = shl nsw i32 [[SUB7_3]], 16 -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[SHL_3]], [[SUB_3]] -; CHECK-NEXT: [[ARRAYIDX8_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 1 -; CHECK-NEXT: [[TMP52:%.*]] = load i8, i8* [[ARRAYIDX8_3]], align 1 -; CHECK-NEXT: [[CONV9_3:%.*]] = zext i8 [[TMP52]] to i32 -; CHECK-NEXT: [[ARRAYIDX10_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 1 -; CHECK-NEXT: [[TMP53:%.*]] = load i8, i8* [[ARRAYIDX10_3]], align 1 -; CHECK-NEXT: [[CONV11_3:%.*]] = zext i8 [[TMP53]] to i32 -; CHECK-NEXT: [[SUB12_3:%.*]] = sub nsw i32 [[CONV9_3]], [[CONV11_3]] -; CHECK-NEXT: [[ARRAYIDX13_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 5 -; CHECK-NEXT: [[TMP54:%.*]] = load i8, i8* [[ARRAYIDX13_3]], align 1 -; CHECK-NEXT: [[CONV14_3:%.*]] = zext i8 [[TMP54]] to i32 -; CHECK-NEXT: [[ARRAYIDX15_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 5 -; CHECK-NEXT: [[TMP55:%.*]] = load i8, i8* [[ARRAYIDX15_3]], align 1 -; CHECK-NEXT: [[CONV16_3:%.*]] = zext i8 [[TMP55]] to i32 -; CHECK-NEXT: [[SUB17_3:%.*]] = sub nsw i32 [[CONV14_3]], [[CONV16_3]] -; CHECK-NEXT: [[SHL18_3:%.*]] = shl nsw i32 [[SUB17_3]], 16 -; CHECK-NEXT: [[ADD19_3:%.*]] = add nsw i32 [[SHL18_3]], [[SUB12_3]] -; CHECK-NEXT: [[ARRAYIDX20_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 2 -; CHECK-NEXT: [[TMP56:%.*]] = load i8, i8* [[ARRAYIDX20_3]], align 1 -; CHECK-NEXT: [[CONV21_3:%.*]] = zext i8 [[TMP56]] to i32 -; CHECK-NEXT: [[ARRAYIDX22_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 2 -; CHECK-NEXT: [[TMP57:%.*]] = load i8, i8* [[ARRAYIDX22_3]], align 1 -; CHECK-NEXT: [[CONV23_3:%.*]] = zext i8 [[TMP57]] to i32 -; CHECK-NEXT: [[SUB24_3:%.*]] = sub nsw i32 [[CONV21_3]], [[CONV23_3]] -; CHECK-NEXT: [[ARRAYIDX25_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 6 -; CHECK-NEXT: [[TMP58:%.*]] = load i8, i8* [[ARRAYIDX25_3]], align 1 -; CHECK-NEXT: [[CONV26_3:%.*]] = zext i8 [[TMP58]] to i32 -; CHECK-NEXT: [[ARRAYIDX27_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 6 -; CHECK-NEXT: [[TMP59:%.*]] = load i8, i8* [[ARRAYIDX27_3]], align 1 -; CHECK-NEXT: [[CONV28_3:%.*]] = zext i8 [[TMP59]] to i32 -; CHECK-NEXT: [[SUB29_3:%.*]] = sub nsw i32 [[CONV26_3]], [[CONV28_3]] -; CHECK-NEXT: [[SHL30_3:%.*]] = shl nsw i32 [[SUB29_3]], 16 -; CHECK-NEXT: [[ADD31_3:%.*]] = add nsw i32 [[SHL30_3]], [[SUB24_3]] -; CHECK-NEXT: [[ARRAYIDX32_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 3 -; CHECK-NEXT: [[TMP60:%.*]] = load i8, i8* [[ARRAYIDX32_3]], align 1 -; CHECK-NEXT: [[CONV33_3:%.*]] = zext i8 [[TMP60]] to i32 -; CHECK-NEXT: [[ARRAYIDX34_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 3 -; CHECK-NEXT: [[TMP61:%.*]] = load i8, i8* [[ARRAYIDX34_3]], align 1 -; CHECK-NEXT: [[CONV35_3:%.*]] = zext i8 [[TMP61]] to i32 -; CHECK-NEXT: [[SUB36_3:%.*]] = sub nsw i32 [[CONV33_3]], [[CONV35_3]] -; CHECK-NEXT: [[ARRAYIDX37_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 7 -; CHECK-NEXT: [[TMP62:%.*]] = load i8, i8* [[ARRAYIDX37_3]], align 1 -; CHECK-NEXT: [[CONV38_3:%.*]] = zext i8 [[TMP62]] to i32 -; CHECK-NEXT: [[ARRAYIDX39_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 7 -; CHECK-NEXT: [[TMP63:%.*]] = load i8, i8* [[ARRAYIDX39_3]], align 1 -; CHECK-NEXT: [[CONV40_3:%.*]] = zext i8 [[TMP63]] to i32 -; CHECK-NEXT: [[SUB41_3:%.*]] = sub nsw i32 [[CONV38_3]], [[CONV40_3]] -; CHECK-NEXT: [[SHL42_3:%.*]] = shl nsw i32 [[SUB41_3]], 16 -; CHECK-NEXT: [[ADD43_3:%.*]] = add nsw i32 [[SHL42_3]], [[SUB36_3]] -; CHECK-NEXT: [[ADD44_3:%.*]] = add nsw i32 [[ADD19_3]], [[ADD_3]] -; CHECK-NEXT: [[SUB45_3:%.*]] = sub nsw i32 [[ADD_3]], [[ADD19_3]] -; CHECK-NEXT: [[ADD46_3:%.*]] = add nsw i32 [[ADD43_3]], [[ADD31_3]] -; CHECK-NEXT: [[SUB47_3:%.*]] = sub nsw i32 [[ADD31_3]], [[ADD43_3]] -; CHECK-NEXT: [[ADD48_3:%.*]] = add nsw i32 [[ADD46_3]], [[ADD44_3]] -; CHECK-NEXT: [[SUB51_3:%.*]] = sub nsw i32 [[ADD44_3]], [[ADD46_3]] -; CHECK-NEXT: [[ADD55_3:%.*]] = add nsw i32 [[SUB47_3]], [[SUB45_3]] -; CHECK-NEXT: [[SUB59_3:%.*]] = sub nsw i32 [[SUB45_3]], [[SUB47_3]] -; CHECK-NEXT: [[ADD78:%.*]] = add nsw i32 [[ADD48_1]], [[ADD48]] -; CHECK-NEXT: [[SUB86:%.*]] = sub nsw i32 [[ADD48]], [[ADD48_1]] -; CHECK-NEXT: [[ADD94:%.*]] = add nsw i32 [[ADD48_3]], [[ADD48_2]] -; CHECK-NEXT: [[SUB102:%.*]] = sub nsw i32 [[ADD48_2]], [[ADD48_3]] -; CHECK-NEXT: [[ADD103:%.*]] = add nsw i32 [[ADD94]], [[ADD78]] -; CHECK-NEXT: [[SUB104:%.*]] = sub nsw i32 [[ADD78]], [[ADD94]] -; CHECK-NEXT: [[ADD105:%.*]] = add nsw i32 [[SUB102]], [[SUB86]] -; CHECK-NEXT: [[SUB106:%.*]] = sub nsw i32 [[SUB86]], [[SUB102]] -; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[ADD103]], 15 -; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537 -; CHECK-NEXT: [[MUL_I:%.*]] = mul nuw i32 [[AND_I]], 65535 -; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]] -; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[MUL_I]] -; CHECK-NEXT: [[SHR_I184:%.*]] = lshr i32 [[ADD105]], 15 -; CHECK-NEXT: [[AND_I185:%.*]] = and i32 [[SHR_I184]], 65537 -; CHECK-NEXT: [[MUL_I186:%.*]] = mul nuw i32 [[AND_I185]], 65535 -; CHECK-NEXT: [[ADD_I187:%.*]] = add i32 [[MUL_I186]], [[ADD105]] -; CHECK-NEXT: [[XOR_I188:%.*]] = xor i32 [[ADD_I187]], [[MUL_I186]] -; CHECK-NEXT: [[SHR_I189:%.*]] = lshr i32 [[SUB104]], 15 -; CHECK-NEXT: [[AND_I190:%.*]] = and i32 [[SHR_I189]], 65537 -; CHECK-NEXT: [[MUL_I191:%.*]] = mul nuw i32 [[AND_I190]], 65535 -; CHECK-NEXT: [[ADD_I192:%.*]] = add i32 [[MUL_I191]], [[SUB104]] -; CHECK-NEXT: [[XOR_I193:%.*]] = xor i32 [[ADD_I192]], [[MUL_I191]] -; CHECK-NEXT: [[SHR_I194:%.*]] = lshr i32 [[SUB106]], 15 -; CHECK-NEXT: [[AND_I195:%.*]] = and i32 [[SHR_I194]], 65537 -; CHECK-NEXT: [[MUL_I196:%.*]] = mul nuw i32 [[AND_I195]], 65535 -; CHECK-NEXT: [[ADD_I197:%.*]] = add i32 [[MUL_I196]], [[SUB106]] -; CHECK-NEXT: [[XOR_I198:%.*]] = xor i32 [[ADD_I197]], [[MUL_I196]] -; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I188]], [[XOR_I]] -; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I193]] -; CHECK-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I198]] -; CHECK-NEXT: [[ADD78_1:%.*]] = add nsw i32 [[ADD55_1]], [[ADD55]] -; CHECK-NEXT: [[SUB86_1:%.*]] = sub nsw i32 [[ADD55]], [[ADD55_1]] -; CHECK-NEXT: [[ADD94_1:%.*]] = add nsw i32 [[ADD55_3]], [[ADD55_2]] -; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 [[ADD55_2]], [[ADD55_3]] -; CHECK-NEXT: [[ADD103_1:%.*]] = add nsw i32 [[ADD94_1]], [[ADD78_1]] -; CHECK-NEXT: [[SUB104_1:%.*]] = sub nsw i32 [[ADD78_1]], [[ADD94_1]] -; CHECK-NEXT: [[ADD105_1:%.*]] = add nsw i32 [[SUB102_1]], [[SUB86_1]] -; CHECK-NEXT: [[SUB106_1:%.*]] = sub nsw i32 [[SUB86_1]], [[SUB102_1]] -; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[ADD103_1]], 15 -; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537 -; CHECK-NEXT: [[MUL_I_1:%.*]] = mul nuw i32 [[AND_I_1]], 65535 -; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]] -; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[MUL_I_1]] -; CHECK-NEXT: [[SHR_I184_1:%.*]] = lshr i32 [[ADD105_1]], 15 -; CHECK-NEXT: [[AND_I185_1:%.*]] = and i32 [[SHR_I184_1]], 65537 -; CHECK-NEXT: [[MUL_I186_1:%.*]] = mul nuw i32 [[AND_I185_1]], 65535 -; CHECK-NEXT: [[ADD_I187_1:%.*]] = add i32 [[MUL_I186_1]], [[ADD105_1]] -; CHECK-NEXT: [[XOR_I188_1:%.*]] = xor i32 [[ADD_I187_1]], [[MUL_I186_1]] -; CHECK-NEXT: [[SHR_I189_1:%.*]] = lshr i32 [[SUB104_1]], 15 -; CHECK-NEXT: [[AND_I190_1:%.*]] = and i32 [[SHR_I189_1]], 65537 -; CHECK-NEXT: [[MUL_I191_1:%.*]] = mul nuw i32 [[AND_I190_1]], 65535 -; CHECK-NEXT: [[ADD_I192_1:%.*]] = add i32 [[MUL_I191_1]], [[SUB104_1]] -; CHECK-NEXT: [[XOR_I193_1:%.*]] = xor i32 [[ADD_I192_1]], [[MUL_I191_1]] -; CHECK-NEXT: [[SHR_I194_1:%.*]] = lshr i32 [[SUB106_1]], 15 -; CHECK-NEXT: [[AND_I195_1:%.*]] = and i32 [[SHR_I194_1]], 65537 -; CHECK-NEXT: [[MUL_I196_1:%.*]] = mul nuw i32 [[AND_I195_1]], 65535 -; CHECK-NEXT: [[ADD_I197_1:%.*]] = add i32 [[MUL_I196_1]], [[SUB106_1]] -; CHECK-NEXT: [[XOR_I198_1:%.*]] = xor i32 [[ADD_I197_1]], [[MUL_I196_1]] -; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I188_1]], [[ADD113]] -; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]] -; CHECK-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I193_1]] -; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I198_1]] -; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 [[SUB51_1]], [[SUB51]] -; CHECK-NEXT: [[SUB86_2:%.*]] = sub nsw i32 [[SUB51]], [[SUB51_1]] -; CHECK-NEXT: [[ADD94_2:%.*]] = add nsw i32 [[SUB51_3]], [[SUB51_2]] -; CHECK-NEXT: [[SUB102_2:%.*]] = sub nsw i32 [[SUB51_2]], [[SUB51_3]] -; CHECK-NEXT: [[ADD103_2:%.*]] = add nsw i32 [[ADD94_2]], [[ADD78_2]] -; CHECK-NEXT: [[SUB104_2:%.*]] = sub nsw i32 [[ADD78_2]], [[ADD94_2]] -; CHECK-NEXT: [[ADD105_2:%.*]] = add nsw i32 [[SUB102_2]], [[SUB86_2]] -; CHECK-NEXT: [[SUB106_2:%.*]] = sub nsw i32 [[SUB86_2]], [[SUB102_2]] -; CHECK-NEXT: [[SHR_I_2:%.*]] = lshr i32 [[ADD103_2]], 15 -; CHECK-NEXT: [[AND_I_2:%.*]] = and i32 [[SHR_I_2]], 65537 -; CHECK-NEXT: [[MUL_I_2:%.*]] = mul nuw i32 [[AND_I_2]], 65535 -; CHECK-NEXT: [[ADD_I_2:%.*]] = add i32 [[MUL_I_2]], [[ADD103_2]] -; CHECK-NEXT: [[XOR_I_2:%.*]] = xor i32 [[ADD_I_2]], [[MUL_I_2]] -; CHECK-NEXT: [[SHR_I184_2:%.*]] = lshr i32 [[ADD105_2]], 15 -; CHECK-NEXT: [[AND_I185_2:%.*]] = and i32 [[SHR_I184_2]], 65537 -; CHECK-NEXT: [[MUL_I186_2:%.*]] = mul nuw i32 [[AND_I185_2]], 65535 -; CHECK-NEXT: [[ADD_I187_2:%.*]] = add i32 [[MUL_I186_2]], [[ADD105_2]] -; CHECK-NEXT: [[XOR_I188_2:%.*]] = xor i32 [[ADD_I187_2]], [[MUL_I186_2]] -; CHECK-NEXT: [[SHR_I189_2:%.*]] = lshr i32 [[SUB104_2]], 15 -; CHECK-NEXT: [[AND_I190_2:%.*]] = and i32 [[SHR_I189_2]], 65537 -; CHECK-NEXT: [[MUL_I191_2:%.*]] = mul nuw i32 [[AND_I190_2]], 65535 -; CHECK-NEXT: [[ADD_I192_2:%.*]] = add i32 [[MUL_I191_2]], [[SUB104_2]] -; CHECK-NEXT: [[XOR_I193_2:%.*]] = xor i32 [[ADD_I192_2]], [[MUL_I191_2]] -; CHECK-NEXT: [[SHR_I194_2:%.*]] = lshr i32 [[SUB106_2]], 15 -; CHECK-NEXT: [[AND_I195_2:%.*]] = and i32 [[SHR_I194_2]], 65537 -; CHECK-NEXT: [[MUL_I196_2:%.*]] = mul nuw i32 [[AND_I195_2]], 65535 -; CHECK-NEXT: [[ADD_I197_2:%.*]] = add i32 [[MUL_I196_2]], [[SUB106_2]] -; CHECK-NEXT: [[XOR_I198_2:%.*]] = xor i32 [[ADD_I197_2]], [[MUL_I196_2]] -; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I188_2]], [[ADD113_1]] -; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[XOR_I_2]] -; CHECK-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[XOR_I193_2]] -; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I198_2]] -; CHECK-NEXT: [[ADD78_3:%.*]] = add nsw i32 [[SUB59_1]], [[SUB59]] -; CHECK-NEXT: [[SUB86_3:%.*]] = sub nsw i32 [[SUB59]], [[SUB59_1]] -; CHECK-NEXT: [[ADD94_3:%.*]] = add nsw i32 [[SUB59_3]], [[SUB59_2]] -; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 [[SUB59_2]], [[SUB59_3]] -; CHECK-NEXT: [[ADD103_3:%.*]] = add nsw i32 [[ADD94_3]], [[ADD78_3]] -; CHECK-NEXT: [[SUB104_3:%.*]] = sub nsw i32 [[ADD78_3]], [[ADD94_3]] -; CHECK-NEXT: [[ADD105_3:%.*]] = add nsw i32 [[SUB102_3]], [[SUB86_3]] -; CHECK-NEXT: [[SUB106_3:%.*]] = sub nsw i32 [[SUB86_3]], [[SUB102_3]] -; CHECK-NEXT: [[SHR_I_3:%.*]] = lshr i32 [[ADD103_3]], 15 -; CHECK-NEXT: [[AND_I_3:%.*]] = and i32 [[SHR_I_3]], 65537 -; CHECK-NEXT: [[MUL_I_3:%.*]] = mul nuw i32 [[AND_I_3]], 65535 -; CHECK-NEXT: [[ADD_I_3:%.*]] = add i32 [[MUL_I_3]], [[ADD103_3]] -; CHECK-NEXT: [[XOR_I_3:%.*]] = xor i32 [[ADD_I_3]], [[MUL_I_3]] -; CHECK-NEXT: [[SHR_I184_3:%.*]] = lshr i32 [[ADD105_3]], 15 -; CHECK-NEXT: [[AND_I185_3:%.*]] = and i32 [[SHR_I184_3]], 65537 -; CHECK-NEXT: [[MUL_I186_3:%.*]] = mul nuw i32 [[AND_I185_3]], 65535 -; CHECK-NEXT: [[ADD_I187_3:%.*]] = add i32 [[MUL_I186_3]], [[ADD105_3]] -; CHECK-NEXT: [[XOR_I188_3:%.*]] = xor i32 [[ADD_I187_3]], [[MUL_I186_3]] -; CHECK-NEXT: [[SHR_I189_3:%.*]] = lshr i32 [[SUB104_3]], 15 -; CHECK-NEXT: [[AND_I190_3:%.*]] = and i32 [[SHR_I189_3]], 65537 -; CHECK-NEXT: [[MUL_I191_3:%.*]] = mul nuw i32 [[AND_I190_3]], 65535 -; CHECK-NEXT: [[ADD_I192_3:%.*]] = add i32 [[MUL_I191_3]], [[SUB104_3]] -; CHECK-NEXT: [[XOR_I193_3:%.*]] = xor i32 [[ADD_I192_3]], [[MUL_I191_3]] -; CHECK-NEXT: [[SHR_I194_3:%.*]] = lshr i32 [[SUB106_3]], 15 -; CHECK-NEXT: [[AND_I195_3:%.*]] = and i32 [[SHR_I194_3]], 65537 -; CHECK-NEXT: [[MUL_I196_3:%.*]] = mul nuw i32 [[AND_I195_3]], 65535 -; CHECK-NEXT: [[ADD_I197_3:%.*]] = add i32 [[MUL_I196_3]], [[SUB106_3]] -; CHECK-NEXT: [[XOR_I198_3:%.*]] = xor i32 [[ADD_I197_3]], [[MUL_I196_3]] -; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I188_3]], [[ADD113_2]] -; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[XOR_I_3]] -; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[XOR_I193_3]] -; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I198_3]] -; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[ADD113_3]], 65535 -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD113_3]], 16 +; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[ADD_PTR_2]] to <4 x i8>* +; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1 +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP17]], <4 x i8> [[TMP25]], <16 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> +; CHECK-NEXT: [[TMP31:%.*]] = zext <16 x i8> [[TMP30]] to <16 x i32> +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8* [[ADD_PTR64_2]] to <4 x i8>* +; CHECK-NEXT: [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> [[TMP33]], <16 x i32> +; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32> +; CHECK-NEXT: [[TMP40:%.*]] = sub nsw <16 x i32> [[TMP31]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = bitcast i8* [[ARRAYIDX3_3]] to <4 x i8>* +; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, <4 x i8>* [[TMP41]], align 1 +; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> [[TMP42]], <16 x i32> +; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> +; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> +; CHECK-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32> +; CHECK-NEXT: [[TMP49:%.*]] = bitcast i8* [[ARRAYIDX5_3]] to <4 x i8>* +; CHECK-NEXT: [[TMP50:%.*]] = load <4 x i8>, <4 x i8>* [[TMP49]], align 1 +; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP23]], <4 x i8> [[TMP50]], <16 x i32> +; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i8> [[TMP51]], <16 x i8> [[TMP52]], <16 x i32> +; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i8> [[TMP53]], <16 x i8> [[TMP54]], <16 x i32> +; CHECK-NEXT: [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i32> +; CHECK-NEXT: [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP48]], [[TMP56]] +; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], +; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP40]] +; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> undef, <16 x i32> +; CHECK-NEXT: [[TMP61:%.*]] = sub nsw <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP60]] +; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> +; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP62]], <16 x i32> [[TMP61]], <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = sub nsw <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP64]] +; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP66]], <16 x i32> +; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP65]], <16 x i32> +; CHECK-NEXT: [[TMP69:%.*]] = sub nsw <16 x i32> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP70:%.*]] = add nsw <16 x i32> [[TMP67]], [[TMP68]] +; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <16 x i32> [[TMP69]], <16 x i32> [[TMP70]], <16 x i32> +; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP69]], <16 x i32> +; CHECK-NEXT: [[TMP73:%.*]] = add nsw <16 x i32> [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP74:%.*]] = sub nsw <16 x i32> [[TMP71]], [[TMP72]] +; CHECK-NEXT: [[TMP75:%.*]] = shufflevector <16 x i32> [[TMP73]], <16 x i32> [[TMP74]], <16 x i32> +; CHECK-NEXT: [[TMP76:%.*]] = lshr <16 x i32> [[TMP75]], +; CHECK-NEXT: [[TMP77:%.*]] = and <16 x i32> [[TMP76]], +; CHECK-NEXT: [[TMP78:%.*]] = mul nuw <16 x i32> [[TMP77]], +; CHECK-NEXT: [[TMP79:%.*]] = add <16 x i32> [[TMP78]], [[TMP75]] +; CHECK-NEXT: [[TMP80:%.*]] = xor <16 x i32> [[TMP79]], [[TMP78]] +; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP80]]) +; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP81]], 65535 +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP81]], 16 ; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]] ; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 ; CHECK-NEXT: ret i32 [[SHR120]]