Index: llvm/trunk/include/llvm/Analysis/VectorUtils.h =================================================================== --- llvm/trunk/include/llvm/Analysis/VectorUtils.h +++ llvm/trunk/include/llvm/Analysis/VectorUtils.h @@ -15,6 +15,7 @@ #define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/MapVector.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -121,7 +122,7 @@ /// /// If the optional TargetTransformInfo is provided, this function tries harder /// to do less work by only looking at illegal types. -DenseMap +MapVector computeMinimumValueSizes(ArrayRef Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr); Index: llvm/trunk/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/trunk/lib/Analysis/VectorUtils.cpp +++ llvm/trunk/lib/Analysis/VectorUtils.cpp @@ -438,7 +438,7 @@ return InsertEltInst->getOperand(1); } -DenseMap +MapVector llvm::computeMinimumValueSizes(ArrayRef Blocks, DemandedBits &DB, const TargetTransformInfo *TTI) { @@ -451,7 +451,7 @@ SmallPtrSet Visited; DenseMap DBits; SmallPtrSet InstructionSet; - DenseMap MinBWs; + MapVector MinBWs; // Determine the roots. We work bottom-up, from truncs or icmps. bool SeenExtFromIllegalType = false; @@ -497,7 +497,7 @@ // If we encounter a type that is larger than 64 bits, we can't represent // it so bail out. if (DB.getDemandedBits(I).getBitWidth() > 64) - return DenseMap(); + return MapVector(); uint64_t V = DB.getDemandedBits(I).getZExtValue(); DBits[Leader] |= V; Index: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -325,7 +325,7 @@ // can be validly truncated to. The cost model has assumed this truncation // will happen when vectorizing. void vectorize(LoopVectorizationLegality *L, - DenseMap MinimumBitWidths) { + MapVector MinimumBitWidths) { MinBWs = MinimumBitWidths; Legal = L; // Create a new empty loop. Unlink the old loop and connect the new one. @@ -546,7 +546,7 @@ /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. - DenseMap MinBWs; + MapVector MinBWs; LoopVectorizationLegality *Legal; // Record whether runtime check is added. @@ -1505,7 +1505,7 @@ /// Map of scalar integer values to the smallest bitwidth they can be legally /// represented as. The vector equivalents of these values should be truncated /// to this type. - DenseMap MinBWs; + MapVector MinBWs; /// The loop that we evaluate. Loop *TheLoop; Index: llvm/trunk/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ llvm/trunk/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -0,0 +1,54 @@ +; RUN: opt -S < %s -loop-vectorize -instcombine 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" +target triple = "aarch64" + +;; See https://llvm.org/bugs/show_bug.cgi?id=25490 +;; Due to the data structures used, the LLVM IR was not determinisic. +;; This test comes from the PR. + +;; CHECK-LABEL: @test( +; CHECK: load <16 x i8> +; CHECK-NEXT: getelementptr +; CHECK-NEXT: bitcast +; CHECK-NEXT: load <16 x i8> +; CHECK-NEXT: zext <16 x i8> +; CHECK-NEXT: zext <16 x i8> +define void @test(i32 %n, i8* nocapture %a, i8* nocapture %b, i8* nocapture readonly %c) { +entry: + %cmp.28 = icmp eq i32 %n, 0 + br i1 %cmp.28, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i8, i8* %c, i64 %indvars.iv + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv + %1 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %1 to i32 + %mul = mul nuw nsw i32 %conv3, %conv + %shr.26 = lshr i32 %mul, 8 + %conv4 = trunc i32 %shr.26 to i8 + store i8 %conv4, i8* %arrayidx2, align 1 + %arrayidx8 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv + %2 = load i8, i8* %arrayidx8, align 1 + %conv9 = zext i8 %2 to i32 + %mul10 = mul nuw nsw i32 %conv9, %conv + %shr11.27 = lshr i32 %mul10, 8 + %conv12 = trunc i32 %shr11.27 to i8 + store i8 %conv12, i8* %arrayidx8, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +}