diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6950,16 +6950,26 @@ BasicBlock *TargetBB = I->getParent(); bool Changed = false; SmallVector ToReplace; + Instruction *InsertPoint = I; + DenseMap InstOrdering; + unsigned long InstNumber = 0; + for (const auto &I : *TargetBB) + InstOrdering[&I] = InstNumber++; + for (Use *U : reverse(OpsToSink)) { auto *UI = cast(U->get()); - if (UI->getParent() == TargetBB || isa(UI)) + if (isa(UI)) + continue; + if (UI->getParent() == TargetBB) { + if (InstOrdering[UI] < InstOrdering[InsertPoint]) + InsertPoint = UI; continue; + } ToReplace.push_back(U); } SetVector MaybeDead; DenseMap NewInstructions; - Instruction *InsertPoint = I; for (Use *U : ToReplace) { auto *UI = cast(U->get()); Instruction *NI = UI->clone(); diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions-inseltpoison.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions-inseltpoison.ll @@ -9,13 +9,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; CHECK-NEXT: ret <8 x i16> [[RES_1]] ; CHECK: if.else: -; CHECK-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +; CHECK-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> ; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] ; CHECK-NEXT: ret <8 x i16> [[RES_2]] ; @@ -39,13 +39,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; CHECK-NEXT: ret <8 x i16> [[RES_1]] ; CHECK: if.else: -; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> ; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> ; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] ; CHECK-NEXT: ret <8 x i16> [[RES_2]] ; @@ -69,8 +69,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; CHECK-NEXT: ret <8 x i16> [[RES_1]] ; CHECK: if.else: @@ -96,8 +96,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; CHECK-NEXT: ret <8 x i16> [[RES_1]] ; CHECK: if.else: @@ -124,13 +124,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]]) ; CHECK-NEXT: ret <8 x i16> [[VMULL0]] ; CHECK: if.else: -; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]]) ; CHECK-NEXT: ret <8 x i16> [[VMULL1]] ; @@ -156,17 +156,17 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> -; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP1]], [[Z2]] ; CHECK-NEXT: ret <8 x i16> [[RES1]] ; CHECK: if.else: -; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> -; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP3]], [[Z4]] ; CHECK-NEXT: ret <8 x i16> [[RES2]] ; @@ -202,17 +202,17 @@ ; CHECK-NEXT: call void @user1(<8 x i16> [[Z3]]) ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> -; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP1]], [[Z2]] ; CHECK-NEXT: ret <8 x i16> [[RES1]] ; CHECK: if.else: -; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> -; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> +; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP3]], [[Z4]] ; CHECK-NEXT: ret <8 x i16> [[RES2]] ; diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/sink-free-instructions.ll @@ -9,13 +9,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; CHECK-NEXT: ret <8 x i16> [[RES_1]] ; CHECK: if.else: -; CHECK-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +; CHECK-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> ; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] ; CHECK-NEXT: ret <8 x i16> [[RES_2]] ; @@ -39,13 +39,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; CHECK-NEXT: ret <8 x i16> [[RES_1]] ; CHECK: if.else: -; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> ; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +; CHECK-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> ; CHECK-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] ; CHECK-NEXT: ret <8 x i16> [[RES_2]] ; @@ -69,8 +69,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; CHECK-NEXT: ret <8 x i16> [[RES_1]] ; CHECK: if.else: @@ -96,8 +96,8 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; CHECK-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; CHECK-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; CHECK-NEXT: ret <8 x i16> [[RES_1]] ; CHECK: if.else: @@ -124,13 +124,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[VMULL0:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP0]], <8 x i8> [[S2]]) ; CHECK-NEXT: ret <8 x i16> [[VMULL0]] ; CHECK: if.else: -; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[VMULL1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[TMP1]], <8 x i8> [[S4]]) ; CHECK-NEXT: ret <8 x i16> [[VMULL1]] ; @@ -156,17 +156,17 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> -; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP1]], [[Z2]] ; CHECK-NEXT: ret <8 x i16> [[RES1]] ; CHECK: if.else: -; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> -; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP3]], [[Z4]] ; CHECK-NEXT: ret <8 x i16> [[RES2]] ; @@ -202,17 +202,17 @@ ; CHECK-NEXT: call void @user1(<8 x i16> [[Z3]]) ; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> -; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i16> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; CHECK-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP1]], [[Z2]] ; CHECK-NEXT: ret <8 x i16> [[RES1]] ; CHECK: if.else: -; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> -; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> undef, <8 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[TMP2]] to <8 x i16> +; CHECK-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> +; CHECK-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; CHECK-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP3]], [[Z4]] ; CHECK-NEXT: ret <8 x i16> [[RES2]] ; @@ -273,3 +273,95 @@ ; Function Attrs: nounwind readnone declare <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8>, <8 x i8>) #2 + +; The insertelement should be inserted before shufflevector, otherwise 'does not dominate all uses' error will occur. +define <4 x i32> @sink_insertelement(i16 %e, i8 %f) { +; CHECK-LABEL: @sink_insertelement( +; CHECK-NEXT: for.cond4.preheader.lr.ph: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[F:%.*]], 0 +; CHECK-NEXT: [[CONV25:%.*]] = sext i16 [[E:%.*]] to i32 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND4_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND4_PREHEADER_PREHEADER:%.*]] +; CHECK: for.cond4.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[CONV25]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT144:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i32> zeroinitializer, [[BROADCAST_SPLAT144]] +; CHECK-NEXT: ret <4 x i32> [[TMP1]] +; CHECK: for.cond4.preheader.preheader: +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; +for.cond4.preheader.lr.ph: + %cmp = icmp slt i8 %f, 0 + %conv25 = sext i16 %e to i32 + %broadcast.splatinsert143 = insertelement <4 x i32> poison, i32 %conv25, i32 0 + br i1 %cmp, label %for.cond4.preheader.us.preheader, label %for.cond4.preheader.preheader + +for.cond4.preheader.us.preheader: ; preds = %for.cond4.preheader.lr.ph + %broadcast.splat144 = shufflevector <4 x i32> %broadcast.splatinsert143, <4 x i32> poison, <4 x i32> zeroinitializer + %0 = mul <4 x i32> zeroinitializer, %broadcast.splat144 + ret <4 x i32> %0 + +for.cond4.preheader.preheader: ; preds = %for.cond4.preheader.lr.ph + ret <4 x i32> zeroinitializer +} + +define <4 x i32> @sinkadd_partial(<8 x i16> %a1, <8 x i16> %a2, i8 %f) { +; CHECK-LABEL: @sinkadd_partial( +; CHECK-NEXT: for.cond4.preheader.lr.ph: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[F:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND4_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND4_PREHEADER_PREHEADER:%.*]] +; CHECK: for.cond4.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[A1:%.*]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A2:%.*]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[E1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> +; CHECK-NEXT: [[E2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[E1]], [[E2]] +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK: for.cond4.preheader.preheader: +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; +for.cond4.preheader.lr.ph: + %cmp = icmp slt i8 %f, 0 + %s2 = shufflevector <8 x i16> %a2, <8 x i16> poison, <4 x i32> + %s1 = shufflevector <8 x i16> %a1, <8 x i16> poison, <4 x i32> + br i1 %cmp, label %for.cond4.preheader.us.preheader, label %for.cond4.preheader.preheader + +for.cond4.preheader.us.preheader: ; preds = %for.cond4.preheader.lr.ph + %e1 = sext <4 x i16> %s1 to <4 x i32> + %e2 = sext <4 x i16> %s2 to <4 x i32> + %0 = add <4 x i32> %e1, %e2 + ret <4 x i32> %0 + +for.cond4.preheader.preheader: ; preds = %for.cond4.preheader.lr.ph + ret <4 x i32> zeroinitializer +} + +define <4 x i32> @sinkadd_partial_rev(<8 x i16> %a1, <8 x i16> %a2, i8 %f) { +; CHECK-LABEL: @sinkadd_partial_rev( +; CHECK-NEXT: for.cond4.preheader.lr.ph: +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[F:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND4_PREHEADER_US_PREHEADER:%.*]], label [[FOR_COND4_PREHEADER_PREHEADER:%.*]] +; CHECK: for.cond4.preheader.us.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[A1:%.*]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A2:%.*]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[E2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: [[E1:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[E1]], [[E2]] +; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK: for.cond4.preheader.preheader: +; CHECK-NEXT: ret <4 x i32> zeroinitializer +; +for.cond4.preheader.lr.ph: + %cmp = icmp slt i8 %f, 0 + %s2 = shufflevector <8 x i16> %a2, <8 x i16> poison, <4 x i32> + %s1 = shufflevector <8 x i16> %a1, <8 x i16> poison, <4 x i32> + br i1 %cmp, label %for.cond4.preheader.us.preheader, label %for.cond4.preheader.preheader + +for.cond4.preheader.us.preheader: ; preds = %for.cond4.preheader.lr.ph + %e2 = sext <4 x i16> %s2 to <4 x i32> + %e1 = sext <4 x i16> %s1 to <4 x i32> + %0 = add <4 x i32> %e1, %e2 + ret <4 x i32> %0 + +for.cond4.preheader.preheader: ; preds = %for.cond4.preheader.lr.ph + ret <4 x i32> zeroinitializer +} diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions-inseltpoison.ll --- a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions-inseltpoison.ll @@ -7,13 +7,13 @@ ; NEON-NEXT: entry: ; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; NEON: if.then: -; NEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> ; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> ; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; NEON-NEXT: ret <8 x i16> [[RES_1]] ; NEON: if.else: -; NEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> ; NEON-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> ; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] ; NEON-NEXT: ret <8 x i16> [[RES_2]] ; @@ -50,13 +50,13 @@ ; NEON-NEXT: entry: ; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; NEON: if.then: -; NEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; NEON-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; NEON-NEXT: ret <8 x i16> [[RES_1]] ; NEON: if.else: -; NEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> ; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> ; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] ; NEON-NEXT: ret <8 x i16> [[RES_2]] ; @@ -180,14 +180,14 @@ ; NEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; NEON: if.then: ; NEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> poison, <8 x i32> -; NEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; NEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; NEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]] ; NEON-NEXT: ret <8 x i16> [[RES1]] ; NEON: if.else: ; NEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> poison, <8 x i32> -; NEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; NEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]] ; NEON-NEXT: ret <8 x i16> [[RES2]] ; diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll --- a/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll +++ b/llvm/test/Transforms/CodeGenPrepare/ARM/sink-free-instructions.ll @@ -7,13 +7,13 @@ ; NEON-NEXT: entry: ; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; NEON: if.then: -; NEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> ; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[ZB_1:%.*]] = zext <8 x i8> [[B:%.*]] to <8 x i16> ; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; NEON-NEXT: ret <8 x i16> [[RES_1]] ; NEON: if.else: -; NEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> ; NEON-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[ZB_2:%.*]] = zext <8 x i8> [[B]] to <8 x i16> ; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] ; NEON-NEXT: ret <8 x i16> [[RES_2]] ; @@ -50,13 +50,13 @@ ; NEON-NEXT: entry: ; NEON-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; NEON: if.then: -; NEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; NEON-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[A:%.*]] to <8 x i16> +; NEON-NEXT: [[ZB_1:%.*]] = sext <8 x i8> [[B:%.*]] to <8 x i16> ; NEON-NEXT: [[RES_1:%.*]] = add <8 x i16> [[TMP0]], [[ZB_1]] ; NEON-NEXT: ret <8 x i16> [[RES_1]] ; NEON: if.else: -; NEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> ; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[A]] to <8 x i16> +; NEON-NEXT: [[ZB_2:%.*]] = sext <8 x i8> [[B]] to <8 x i16> ; NEON-NEXT: [[RES_2:%.*]] = sub <8 x i16> [[TMP1]], [[ZB_2]] ; NEON-NEXT: ret <8 x i16> [[RES_2]] ; @@ -180,14 +180,14 @@ ; NEON-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; NEON: if.then: ; NEON-NEXT: [[S2:%.*]] = shufflevector <16 x i8> [[B:%.*]], <16 x i8> undef, <8 x i32> -; NEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; NEON-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[S1]] to <8 x i16> +; NEON-NEXT: [[Z2:%.*]] = zext <8 x i8> [[S2]] to <8 x i16> ; NEON-NEXT: [[RES1:%.*]] = add <8 x i16> [[TMP0]], [[Z2]] ; NEON-NEXT: ret <8 x i16> [[RES1]] ; NEON: if.else: ; NEON-NEXT: [[S4:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> undef, <8 x i32> -; NEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; NEON-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[S3]] to <8 x i16> +; NEON-NEXT: [[Z4:%.*]] = sext <8 x i8> [[S4]] to <8 x i16> ; NEON-NEXT: [[RES2:%.*]] = sub <8 x i16> [[TMP1]], [[Z4]] ; NEON-NEXT: ret <8 x i16> [[RES2]] ;