Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11632,6 +11632,9 @@ if (!Shuffle || !Shuffle->isZeroEltSplat()) continue; + if (Shuffle->getParent() == I->getParent()) + continue; + Value *ShuffleOperand = Shuffle->getOperand(0); InsertElementInst *Insert = dyn_cast(ShuffleOperand); if (!Insert) Index: llvm/test/CodeGen/AArch64/avoid-sinking-mul-ops.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/avoid-sinking-mul-ops.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -codegenprepare < %s -S -o -| FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@e = external dso_local local_unnamed_addr global i64, align 8 +@f = external dso_local local_unnamed_addr global i64, align 8 +@b = external dso_local local_unnamed_addr global i16, align 2 +@c = external dso_local local_unnamed_addr global i32, align 4 +@d = external dso_local local_unnamed_addr global i64, align 8 + +; Function Attrs: nofree norecurse nounwind +define dso_local i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: @main( +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @e, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP7:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* @f, align 8 +; CHECK-NEXT: br label [[TMP5:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[TMP5]], label [[TMP46:%.*]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* @b, align 2 +; CHECK-NEXT: [[TMP9:%.*]] = load i64, i64* @d, align 8 +; CHECK-NEXT: [[TMP10:%.*]] = sext i16 [[TMP8]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> undef, i32 [[TMP10]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = mul i32 [[TMP12]], [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], [[TMP10]] +; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[TMP14]], [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[TMP16]], [[TMP10]] +; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[TMP17]], [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], [[TMP10]] +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> undef, i32 [[TMP19]], i64 0 +; CHECK-NEXT: [[TMP21:%.*]] = icmp ult i64 [[TMP9]], 10 +; CHECK-NEXT: br i1 [[TMP21]], label [[TMP22:%.*]], label [[TMP23:%.*]] +; CHECK: 22: +; CHECK-NEXT: store i64 10, i64* @d, align 8 +; CHECK-NEXT: br label [[TMP23]] +; CHECK: 23: +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP25:%.*]] = mul <4 x i32> [[TMP20]], [[TMP24]] +; CHECK-NEXT: [[TMP26:%.*]] = mul <4 x i32> [[TMP25]], [[TMP24]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <4 x i32> [[TMP26]], [[TMP24]] +; CHECK-NEXT: [[TMP28:%.*]] = mul <4 x i32> [[TMP27]], [[TMP24]] +; CHECK-NEXT: [[TMP29:%.*]] = mul <4 x i32> [[TMP28]], [[TMP24]] +; CHECK-NEXT: [[TMP30:%.*]] = mul <4 x i32> [[TMP29]], [[TMP24]] +; CHECK-NEXT: [[TMP31:%.*]] = mul <4 x i32> [[TMP30]], [[TMP24]] +; CHECK-NEXT: [[TMP32:%.*]] = mul <4 x i32> [[TMP31]], [[TMP24]] +; CHECK-NEXT: [[TMP33:%.*]] = mul <4 x i32> [[TMP32]], [[TMP24]] +; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i32> [[TMP33]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = mul nuw <4 x i32> [[TMP34]], +; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <4 x i32> [[TMP35]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP37:%.*]] = mul <4 x i32> [[TMP35]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <4 x i32> [[TMP37]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP39:%.*]] = mul <4 x i32> [[TMP37]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <4 x i32> [[TMP39]], i32 0 +; CHECK-NEXT: [[TMP41:%.*]] = mul nuw i32 [[TMP40]], [[TMP10]] +; CHECK-NEXT: [[TMP42:%.*]] = mul nuw nsw i32 [[TMP41]], [[TMP10]] +; CHECK-NEXT: [[TMP43:%.*]] = mul nuw nsw i32 [[TMP42]], [[TMP10]] +; CHECK-NEXT: [[TMP44:%.*]] = mul nuw i32 [[TMP43]], [[TMP10]] +; CHECK-NEXT: [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP10]] +; CHECK-NEXT: store i32 [[TMP45]], i32* @c, align 4 +; CHECK-NEXT: br label [[TMP46]] +; CHECK: 46: +; CHECK-NEXT: ret i32 0 +; + %1 = load i64, i64* @e, align 8 + %2 = icmp eq i64 %1, 0 + br i1 %2, label %3, label %7 + +3: ; preds = %0 + %4 = load i64, i64* @f, align 8 + br label %5 + +5: ; preds = %5, %3 + %6 = icmp eq i64 %4, 0 + br i1 %6, label %5, label %46 + +7: ; preds = %0 + %8 = load i16, i16* @b, align 2 + %9 = load i64, i64* @d, align 8 + %10 = sext i16 %8 to i32 + %11 = insertelement <4 x i32> undef, i32 %10, i32 0 + %12 = mul nsw i32 %10, %10 + %13 = mul i32 %12, %10 + %14 = mul i32 %13, %10 + %15 = mul i32 %14, %10 + %16 = mul i32 %15, %10 + %17 = mul i32 %16, %10 + %18 = mul i32 %17, %10 + %19 = mul i32 %18, %10 + %20 = insertelement <4 x i32> undef, i32 %19, i64 0 + %21 = icmp ult i64 %9, 10 + br i1 %21, label %22, label %23 + +22: ; preds = %7 + store i64 10, i64* @d, align 8 + br label %23 + +23: ; preds = %22, %7 + %24 = shufflevector <4 x i32> %11, <4 x i32> undef, <4 x i32> zeroinitializer + %25 = mul <4 x i32> %20, %24 + %26 = mul <4 x i32> %25, %24 + %27 = mul <4 x i32> %26, %24 + %28 = mul <4 x i32> %27, %24 + %29 = mul <4 x i32> %28, %24 + %30 = mul <4 x i32> %29, %24 + %31 = mul <4 x i32> %30, %24 + %32 = mul <4 x i32> %31, %24 + %33 = mul <4 x i32> %32, %24 + %34 = shufflevector <4 x i32> %33, <4 x i32> poison, <4 x i32> zeroinitializer + %35 = mul nuw <4 x i32> %34, + %36 = shufflevector <4 x i32> %35, <4 x i32> undef, <4 x i32> + %37 = mul <4 x i32> %35, %36 + %38 = shufflevector <4 x i32> %37, <4 x i32> undef, <4 x i32> + %39 = mul <4 x i32> %37, %38 + %40 = extractelement <4 x i32> %39, i32 0 + %41 = mul nuw i32 %40, %10 + %42 = mul nuw nsw i32 %41, %10 + %43 = mul nuw nsw i32 %42, %10 + %44 = mul nuw i32 %43, %10 + %45 = mul i32 %44, %10 + store i32 %45, i32* @c, align 4 + br label %46 + +46: ; preds = %5, %23 + ret i32 0 +}