The majority of stripPointerCastsAndOffsets() queries can be resolved
after checking 1 or 2 values. This patch delays the creation and
insertion into a SmallPtrSet until we have to check more than that. This
slightly improves compile time in my benchmarks.
Details
Diff Detail
- Repository
- rL LLVM
Event Timeline
Ugh. It's pretty sad that the optimizer doesn't fix this.
lib/IR/Value.cpp | ||
---|---|---|
424 | Maybe use a lambda instead? |
I recall also seeing this problem recently in a different context, and it looked like the reason why this mattered was because the constructor ends up zeroing the stack array (instead of leaving it uninitialized).
If we can fix the class implementation, or the optimizer, that would be better. It looks like there are two problems here, one is the memset (which is what I recall seeing in the profile), and the other is that we don't inline the destructor.
Specifically:
$ cat /tmp/q1.cpp #include <llvm/ADT/SmallPtrSet.h> #include <llvm/IR/Type.h> using namespace llvm; bool foo(SmallPtrSetImpl<Type*> *Visited); bool isSized() { SmallPtrSet<Type *, 4> V; return foo(&V); } $ clang++ -std=gnu++11 -O3 -S -emit-llvm -Iinclude -I/build/ppc64/llvm/include --gcc-toolchain=/install/ppc64/gcc -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -o - /tmp/q1.cpp -fno-exceptions ... define zeroext i1 @_Z7isSizedv() #0 { entry: %V = alloca %"class.llvm::SmallPtrSet", align 8 %0 = bitcast %"class.llvm::SmallPtrSet"* %V to i8* call void @llvm.lifetime.start(i64 64, i8* %0) #3 %arraydecay.i = getelementptr inbounds %"class.llvm::SmallPtrSet", %"class.llvm::SmallPtrSet"* %V, i64 0, i32 1, i64 0 %SmallArray.i.i.i = getelementptr inbounds %"class.llvm::SmallPtrSet", %"class.llvm::SmallPtrSet"* %V, i64 0, i32 0, i32 0, i32 0 store i8** %arraydecay.i, i8*** %SmallArray.i.i.i, align 8, !tbaa !1 %1 = getelementptr inbounds %"class.llvm::SmallPtrSet", %"class.llvm::SmallPtrSet"* %V, i64 0, i32 0, i32 0, i32 1 store i8** %arraydecay.i, i8*** %1, align 8, !tbaa !7 %2 = getelementptr inbounds %"class.llvm::SmallPtrSet", %"class.llvm::SmallPtrSet"* %V, i64 0, i32 0, i32 0, i32 2 store i32 4, i32* %2, align 8, !tbaa !8 %3 = bitcast i8** %arraydecay.i to i8* %4 = getelementptr inbounds %"class.llvm::SmallPtrSet", %"class.llvm::SmallPtrSet"* %V, i64 0, i32 0, i32 0, i32 3 call void @llvm.memset.p0i8.i64(i8* %3, i8 -1, i64 32, i32 8, i1 false) #3 store i32 0, i32* %4, align 4, !tbaa !9 %5 = getelementptr inbounds %"class.llvm::SmallPtrSet", %"class.llvm::SmallPtrSet"* %V, i64 0, i32 0, i32 0, i32 4 store i32 0, i32* %5, align 8, !tbaa !10 %6 = bitcast %"class.llvm::SmallPtrSet"* %V to %"class.llvm::SmallPtrSetImpl"* %call = call zeroext i1 @_Z3fooPN4llvm15SmallPtrSetImplIPNS_4TypeEEE(%"class.llvm::SmallPtrSetImpl"* %6) #3 %7 = bitcast %"class.llvm::SmallPtrSet"* %V to %"class.llvm::SmallPtrSetImplBase"* call void @_ZN4llvm19SmallPtrSetImplBaseD2Ev(%"class.llvm::SmallPtrSetImplBase"* nonnull %7) #3 call void @llvm.lifetime.end(i64 64, i8* %0) #3 ret i1 %call } ...
Consider, on the other hand, what happens with a SmallVector of the same type:
$ cat /tmp/q2.cpp #include <llvm/ADT/SmallVector.h> #include <llvm/IR/Type.h> using namespace llvm; bool foo(SmallVectorImpl<Type*> *Visited); bool isSized() { SmallVector<Type *, 4> V; return foo(&V); } $ clang ... /tmp/q2.cpp ... ... define zeroext i1 @_Z7isSizedv() #0 { entry: %V = alloca %"class.llvm::SmallVector", align 8 %0 = bitcast %"class.llvm::SmallVector"* %V to i8* call void @llvm.lifetime.start(i64 56, i8* %0) #3 %1 = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %V, i64 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 0 %BeginX.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %V, i64 0, i32 0, i32 0, i32 0, i32 0, i32 0 store i8* %1, i8** %BeginX.i.i.i.i.i, align 8, !tbaa !1 %EndX.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %V, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1 store i8* %1, i8** %EndX.i.i.i.i.i, align 8, !tbaa !6 %CapacityX.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %V, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2 %add.ptr.i.i.i.i.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %V, i64 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i64 32 store i8* %add.ptr.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i, align 8, !tbaa !7 %2 = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %V, i64 0, i32 0 %call = call zeroext i1 @_Z3fooPN4llvm15SmallVectorImplIPNS_4TypeEEE(%"class.llvm::SmallVectorImpl"* %2) #3 %3 = load i8*, i8** %BeginX.i.i.i.i.i, align 8, !tbaa !1 %cmp.i.i = icmp eq i8* %3, %1 br i1 %cmp.i.i, label %_ZN4llvm15SmallVectorImplIPNS_4TypeEED2Ev.exit, label %if.then.i if.then.i: ; preds = %entry call void @free(i8* %3) #3 br label %_ZN4llvm15SmallVectorImplIPNS_4TypeEED2Ev.exit _ZN4llvm15SmallVectorImplIPNS_4TypeEED2Ev.exit: ; preds = %entry, %if.then.i call void @llvm.lifetime.end(i64 56, i8* %0) #3 ret i1 %call } ...
Here we have no memset and the destructor is inlined.
I just posted patches that
I just posted reviews that should avoid the memset in the small SmallPtrSet cases. I can't measure any signifant differences with it, so we can abandon this (assuming the SmallPtrSet changes get approved).
...
I just posted reviews that should avoid the memset in the small SmallPtrSet cases. I can't measure any signifant differences with it, so we can abandon this (assuming the SmallPtrSet changes get approved).
Great, thanks!
Maybe use a lambda instead?