Skip to content

Commit

Permalink
[LoopVectorize] Use MapVector rather than DenseMap for MinBWs.
Browse files Browse the repository at this point in the history
The order in which instructions are truncated in truncateToMinimalBitwidths
effects code generation. Switch to a map with a determinisic order, since the
iteration order over a DenseMap is not defined.

This code is not hot, so the difference in container performance isn't
interesting.

Many thanks to David Blaikie for making me aware of MapVector!

Fixes PR25490.

Differential Revision: http://reviews.llvm.org/D14981

llvm-svn: 254179
Charlie Turner committed Nov 26, 2015
1 parent a1b8fc3 commit 54336a5
Showing 4 changed files with 62 additions and 7 deletions.
3 changes: 2 additions & 1 deletion llvm/include/llvm/Analysis/VectorUtils.h
Original file line number Diff line number Diff line change
@@ -15,6 +15,7 @@
#define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
@@ -121,7 +122,7 @@ Value *getSplatValue(Value *V);
///
/// If the optional TargetTransformInfo is provided, this function tries harder
/// to do less work by only looking at illegal types.
DenseMap<Instruction*, uint64_t>
MapVector<Instruction*, uint64_t>
computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
DemandedBits &DB,
const TargetTransformInfo *TTI=nullptr);
6 changes: 3 additions & 3 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
@@ -438,7 +438,7 @@ llvm::Value *llvm::getSplatValue(Value *V) {
return InsertEltInst->getOperand(1);
}

DenseMap<Instruction *, uint64_t>
MapVector<Instruction *, uint64_t>
llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
const TargetTransformInfo *TTI) {

@@ -451,7 +451,7 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
SmallPtrSet<Value *, 16> Visited;
DenseMap<Value *, uint64_t> DBits;
SmallPtrSet<Instruction *, 4> InstructionSet;
DenseMap<Instruction *, uint64_t> MinBWs;
MapVector<Instruction *, uint64_t> MinBWs;

// Determine the roots. We work bottom-up, from truncs or icmps.
bool SeenExtFromIllegalType = false;
@@ -497,7 +497,7 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
// If we encounter a type that is larger than 64 bits, we can't represent
// it so bail out.
if (DB.getDemandedBits(I).getBitWidth() > 64)
return DenseMap<Instruction *, uint64_t>();
return MapVector<Instruction *, uint64_t>();

uint64_t V = DB.getDemandedBits(I).getZExtValue();
DBits[Leader] |= V;
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
@@ -325,7 +325,7 @@ class InnerLoopVectorizer {
// can be validly truncated to. The cost model has assumed this truncation
// will happen when vectorizing.
void vectorize(LoopVectorizationLegality *L,
DenseMap<Instruction*,uint64_t> MinimumBitWidths) {
MapVector<Instruction*,uint64_t> MinimumBitWidths) {
MinBWs = MinimumBitWidths;
Legal = L;
// Create a new empty loop. Unlink the old loop and connect the new one.
@@ -546,7 +546,7 @@ class InnerLoopVectorizer {
/// Map of scalar integer values to the smallest bitwidth they can be legally
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
DenseMap<Instruction*,uint64_t> MinBWs;
MapVector<Instruction*,uint64_t> MinBWs;
LoopVectorizationLegality *Legal;

// Record whether runtime check is added.
@@ -1505,7 +1505,7 @@ class LoopVectorizationCostModel {
/// Map of scalar integer values to the smallest bitwidth they can be legally
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
DenseMap<Instruction*,uint64_t> MinBWs;
MapVector<Instruction*,uint64_t> MinBWs;

/// The loop that we evaluate.
Loop *TheLoop;
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; RUN: opt -S < %s -loop-vectorize -instcombine 2>&1 | FileCheck %s

target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"

;; See https://llvm.org/bugs/show_bug.cgi?id=25490
;; Due to the data structures used, the LLVM IR was not determinisic.
;; This test comes from the PR.

;; CHECK-LABEL: @test(
; CHECK: load <16 x i8>
; CHECK-NEXT: getelementptr
; CHECK-NEXT: bitcast
; CHECK-NEXT: load <16 x i8>
; CHECK-NEXT: zext <16 x i8>
; CHECK-NEXT: zext <16 x i8>
define void @test(i32 %n, i8* nocapture %a, i8* nocapture %b, i8* nocapture readonly %c) {
entry:
%cmp.28 = icmp eq i32 %n, 0
br i1 %cmp.28, label %for.cond.cleanup, label %for.body.preheader

for.body.preheader: ; preds = %entry
br label %for.body

for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void

for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
%arrayidx = getelementptr inbounds i8, i8* %c, i64 %indvars.iv
%0 = load i8, i8* %arrayidx, align 1
%conv = zext i8 %0 to i32
%arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
%1 = load i8, i8* %arrayidx2, align 1
%conv3 = zext i8 %1 to i32
%mul = mul nuw nsw i32 %conv3, %conv
%shr.26 = lshr i32 %mul, 8
%conv4 = trunc i32 %shr.26 to i8
store i8 %conv4, i8* %arrayidx2, align 1
%arrayidx8 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
%2 = load i8, i8* %arrayidx8, align 1
%conv9 = zext i8 %2 to i32
%mul10 = mul nuw nsw i32 %conv9, %conv
%shr11.27 = lshr i32 %mul10, 8
%conv12 = trunc i32 %shr11.27 to i8
store i8 %conv12, i8* %arrayidx8, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}

0 comments on commit 54336a5

Please sign in to comment.