diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1924,6 +1924,28 @@ if (Instruction *I = commonCastTransforms(CI)) return I; + if (auto *LI = dyn_cast(CI.getOperand(0))) { + // inttoptr/ptrtoint casts are Bad News, and in general, there is little + // we can do about them. However, casting pointer to an integer is, + // arguably, less troubling than casting integer to a pointer. + // So if we have a inttoptr(load), let's aggressively fold that to load + // of a pointer, and adjust *all* users ourselves. + Builder.SetInsertPoint(LI); + LoadInst *NewLoad = + combineLoadToNewType(*LI, PointerType::getInt8PtrTy(LI->getContext()), + LI->getName() + ".ptr"); + replaceInstUsesWith( + CI, Builder.CreatePointerCast(NewLoad, CI.getType(), CI.getName())); + if (!LI->use_empty()) { + // Original load had other uses besides our original inttoptr cast, + // so we have to update them as well. + replaceInstUsesWith(*LI, + Builder.CreatePointerCast(NewLoad, LI->getType())); + } + eraseInstFromFunction(*LI); + return nullptr; + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/multi-use-load-casts.ll b/llvm/test/Transforms/InstCombine/multi-use-load-casts.ll --- a/llvm/test/Transforms/InstCombine/multi-use-load-casts.ll +++ b/llvm/test/Transforms/InstCombine/multi-use-load-casts.ll @@ -7,7 +7,8 @@ define void @t0(i1 zeroext %c0, i1 zeroext %c1, i64* nocapture readonly %src) { ; CHECK-LABEL: @t0( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[DATA:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[SRC:%.*]] to i8** +; CHECK-NEXT: [[DATADATA_PTR:%.*]] = load i8*, i8** [[TMP0]], align 8 ; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB3:%.*]], label [[BB7:%.*]] ; CHECK: bb3: ; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB4:%.*]], label [[BB5:%.*]] @@ -15,12 +16,12 @@ ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable ; CHECK: bb5: -; CHECK-NEXT: [[PTR0:%.*]] = inttoptr i64 [[DATA]] to i32* +; CHECK-NEXT: [[PTR0:%.*]] = bitcast i8* [[DATADATA_PTR]] to i32* ; CHECK-NEXT: tail call void @sink0(i32* [[PTR0]]) ; CHECK-NEXT: br label [[BB9:%.*]] ; CHECK: bb7: -; CHECK-NEXT: [[PTR1:%.*]] = inttoptr i64 [[DATA]] to i32* -; CHECK-NEXT: tail call void @sink1(i32* [[PTR1]]) +; CHECK-NEXT: [[PTR11:%.*]] = bitcast i8* [[DATADATA_PTR]] to i32* +; CHECK-NEXT: tail call void @sink1(i32* [[PTR11]]) ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb9: ; CHECK-NEXT: ret void @@ -54,7 +55,9 @@ define void @n1(i1 zeroext %c0, i1 zeroext %c1, i64* nocapture readonly %src) { ; CHECK-LABEL: @n1( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[DATA:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[SRC:%.*]] to i8** +; CHECK-NEXT: [[DATADATA_PTR:%.*]] = load i8*, i8** [[TMP0]], align 8 +; CHECK-NEXT: [[PTR01:%.*]] = bitcast i8* [[DATADATA_PTR]] to i32* ; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB3:%.*]], label [[BB7:%.*]] ; CHECK: bb3: ; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB4:%.*]], label [[BB5:%.*]] @@ -62,11 +65,11 @@ ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable ; CHECK: bb5: -; CHECK-NEXT: [[PTR0:%.*]] = inttoptr i64 [[DATA]] to i32* -; CHECK-NEXT: tail call void @sink0(i32* [[PTR0]]) +; CHECK-NEXT: tail call void @sink0(i32* [[PTR01]]) ; CHECK-NEXT: br label [[BB9:%.*]] ; CHECK: bb7: -; CHECK-NEXT: [[VEC:%.*]] = bitcast i64 [[DATA]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8* [[DATADATA_PTR]] to i64 +; CHECK-NEXT: [[VEC:%.*]] = bitcast i64 [[TMP1]] to <2 x i32> ; CHECK-NEXT: tail call void @sink2(<2 x i32> [[VEC]]) ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb9: @@ -101,7 +104,9 @@ define void @n2(i1 zeroext %c0, i1 zeroext %c1, i64* nocapture readonly %src) { ; CHECK-LABEL: @n2( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[DATA:%.*]] = load i64, i64* [[SRC:%.*]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[SRC:%.*]] to i8** +; CHECK-NEXT: [[DATADATA_PTR:%.*]] = load i8*, i8** [[TMP0]], align 8 +; CHECK-NEXT: [[PTR01:%.*]] = bitcast i8* [[DATADATA_PTR]] to i32* ; CHECK-NEXT: br i1 [[C0:%.*]], label [[BB3:%.*]], label [[BB7:%.*]] ; CHECK: bb3: ; CHECK-NEXT: br i1 [[C1:%.*]], label [[BB4:%.*]], label [[BB5:%.*]] @@ -109,11 +114,11 @@ ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable ; CHECK: bb5: -; CHECK-NEXT: [[PTR0:%.*]] = inttoptr i64 [[DATA]] to i32* -; CHECK-NEXT: tail call void @sink0(i32* [[PTR0]]) +; CHECK-NEXT: tail call void @sink0(i32* [[PTR01]]) ; CHECK-NEXT: br label [[BB9:%.*]] ; CHECK: bb7: -; CHECK-NEXT: tail call void @sink3(i64 [[DATA]]) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8* [[DATADATA_PTR]] to i64 +; CHECK-NEXT: tail call void @sink3(i64 [[TMP1]]) ; CHECK-NEXT: br label [[BB9]] ; CHECK: bb9: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -1141,10 +1141,10 @@ ; CHECK-NEXT: [[X1:%.*]] = bitcast i8** [[X]] to i64* ; CHECK-NEXT: call void @scribble_on_i64(i64* nonnull [[X1]]) ; CHECK-NEXT: call void @scribble_on_i64(i64* nonnull [[TMPCAST]]) -; CHECK-NEXT: [[TMP:%.*]] = load i64, i64* [[X1]], align 8 -; CHECK-NEXT: store i64 [[TMP]], i64* [[TMPCAST]], align 8 -; CHECK-NEXT: [[V:%.*]] = inttoptr i64 [[TMP]] to i8* -; CHECK-NEXT: ret i8* [[V]] +; CHECK-NEXT: [[TMPTMP_PTR:%.*]] = load i8*, i8** [[X]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8* [[TMPTMP_PTR]] to i64 +; CHECK-NEXT: store i64 [[TMP1]], i64* [[TMPCAST]], align 8 +; CHECK-NEXT: ret i8* [[TMPTMP_PTR]] ; %x = alloca i8* %y = alloca i64 diff --git a/llvm/test/Transforms/InstCombine/volatile_load_cast.ll b/llvm/test/Transforms/InstCombine/volatile_load_cast.ll --- a/llvm/test/Transforms/InstCombine/volatile_load_cast.ll +++ b/llvm/test/Transforms/InstCombine/volatile_load_cast.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s ; Ensure that volatile loads followed by a bitcast don't get transformed into a @@ -10,9 +11,10 @@ define float @float_load(i32* %addr) { ; CHECK-LABEL: @float_load( -; CHECK: %i32 = load volatile i32, i32* %addr, align 4 -; CHECK-NEXT: %float = bitcast i32 %i32 to float -; CHECK-NEXT: ret float %float +; CHECK-NEXT: [[I32:%.*]] = load volatile i32, i32* [[ADDR:%.*]], align 4 +; CHECK-NEXT: [[FLOAT:%.*]] = bitcast i32 [[I32]] to float +; CHECK-NEXT: ret float [[FLOAT]] +; %i32 = load volatile i32, i32* %addr, align 4 %float = bitcast i32 %i32 to float ret float %float @@ -20,9 +22,10 @@ define i32 @i32_load(float* %addr) { ; CHECK-LABEL: @i32_load( -; CHECK: %float = load volatile float, float* %addr, align 4 -; CHECK-NEXT: %i32 = bitcast float %float to i32 -; CHECK-NEXT: ret i32 %i32 +; CHECK-NEXT: [[FLOAT:%.*]] = load volatile float, float* [[ADDR:%.*]], align 4 +; CHECK-NEXT: [[I32:%.*]] = bitcast float [[FLOAT]] to i32 +; CHECK-NEXT: ret i32 [[I32]] +; %float = load volatile float, float* %addr, align 4 %i32 = bitcast float %float to i32 ret i32 %i32 @@ -30,9 +33,10 @@ define double @double_load(i64* %addr) { ; CHECK-LABEL: @double_load( -; CHECK: %i64 = load volatile i64, i64* %addr, align 8 -; CHECK-NEXT: %double = bitcast i64 %i64 to double -; CHECK-NEXT: ret double %double +; CHECK-NEXT: [[I64:%.*]] = load volatile i64, i64* [[ADDR:%.*]], align 8 +; CHECK-NEXT: [[DOUBLE:%.*]] = bitcast i64 [[I64]] to double +; CHECK-NEXT: ret double [[DOUBLE]] +; %i64 = load volatile i64, i64* %addr, align 8 %double = bitcast i64 %i64 to double ret double %double @@ -40,9 +44,10 @@ define i64 @i64_load(double* %addr) { ; CHECK-LABEL: @i64_load( -; CHECK: %double = load volatile double, double* %addr, align 8 -; CHECK-NEXT: %i64 = bitcast double %double to i64 -; CHECK-NEXT: ret i64 %i64 +; CHECK-NEXT: [[DOUBLE:%.*]] = load volatile double, double* [[ADDR:%.*]], align 8 +; CHECK-NEXT: [[I64:%.*]] = bitcast double [[DOUBLE]] to i64 +; CHECK-NEXT: ret i64 [[I64]] +; %double = load volatile double, double* %addr, align 8 %i64 = bitcast double %double to i64 ret i64 %i64 @@ -50,9 +55,10 @@ define i8* @ptr_load(i64* %addr) { ; CHECK-LABEL: @ptr_load( -; CHECK: %i64 = load volatile i64, i64* %addr, align 8 -; CHECK-NEXT: %ptr = inttoptr i64 %i64 to i8* -; CHECK-NEXT: ret i8* %ptr +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[ADDR:%.*]] to i8** +; CHECK-NEXT: [[I64I64_PTR:%.*]] = load volatile i8*, i8** [[TMP1]], align 8 +; CHECK-NEXT: ret i8* [[I64I64_PTR]] +; %i64 = load volatile i64, i64* %addr, align 8 %ptr = inttoptr i64 %i64 to i8* ret i8* %ptr diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll --- a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -68,13 +68,14 @@ ; CHECK-LABEL: @_Z3foo1S( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 -; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I5_SROA_CAST]], align 8 +; CHECK-NEXT: [[TMP0]] = bitcast %0* [[ARG:%.*]] to i8** +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOADI1_SROA_0_0_COPYLOAD_PTR:%.*]] = load i8*, i8** [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[I1_SROA_0_0_COPYLOADI1_SROA_0_0_COPYLOAD_PTR]] to i32* +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint i8* [[I1_SROA_0_0_COPYLOADI1_SROA_0_0_COPYLOAD_PTR]] to i64 ; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_CAST:%.*]] = bitcast %0* [[I2]] to i64* -; CHECK-NEXT: store i64 [[I1_SROA_0_0_COPYLOAD]], i64* [[I_SROA_0_0_I6_SROA_CAST]], align 8 +; CHECK-NEXT: store i64 [[TMP2]], i64* [[I_SROA_0_0_I6_SROA_CAST]], align 8 ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval(%0) align 8 [[I2]]) -; CHECK-NEXT: [[TMP0]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: ret i32* [[TMP0]] +; CHECK-NEXT: ret i32* [[TMP1]] ; bb: %i = alloca %0, align 8 @@ -108,23 +109,23 @@ define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3bar1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_CAST:%.*]] = bitcast %0* [[ARG:%.*]] to i64* -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[I1_SROA_0_0_I4_SROA_CAST]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG:%.*]] to i8** +; CHECK-NEXT: [[I1_SROA_0_0_COPYLOADI1_SROA_0_0_COPYLOAD_PTR:%.*]] = load i8*, i8** [[TMP0]], align 8 ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] ; CHECK: bb7: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[I1_SROA_0_0_COPYLOADI1_SROA_0_0_COPYLOAD_PTR]] to i32* ; CHECK-NEXT: tail call void @_Z5sync0v() -; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP0]]) +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP1]]) ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb10: ; CHECK-NEXT: tail call void @_Z5sync1v() -; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I1_SROA_0_0_COPYLOAD]] to i32* -; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[I1_SROA_0_0_COPYLOADI1_SROA_0_0_COPYLOAD_PTR]] to i32* +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP2]]) ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: -; CHECK-NEXT: [[DOTPRE_PHI:%.*]] = phi i32* [ [[TMP1]], [[BB10]] ], [ [[TMP0]], [[BB7]] ] +; CHECK-NEXT: [[DOTPRE_PHI:%.*]] = phi i32* [ [[TMP2]], [[BB10]] ], [ [[TMP1]], [[BB7]] ] ; CHECK-NEXT: ret i32* [[DOTPRE_PHI]] ; bb: diff --git a/llvm/utils/compare-stats.py b/llvm/utils/compare-stats.py new file mode 100755 --- /dev/null +++ b/llvm/utils/compare-stats.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +''' +Compare two .stats files generated by llvm tools + +compare-stats.py takes as argument two stats files to compare +and output the result CSV on stdout + +Usage: + compare-stats.py total-old.stats total-new.stats +''' + +import json +import sys +import collections + +result = {} + +def tassign(tup, index, val): + return tup[:index] + (val,) + tup[index + 1:] + +for arg in range(1, len(sys.argv)): + with open(sys.argv[arg], "r", encoding='utf-8', + errors='ignore') as f: + text = f.read() + try: + data = json.loads(text) + except: + print('ignored %s: failed to parse' % sys.argv[arg], file= sys.stderr) + continue + for key in data: + if not key in result: + result[key] = (0,0) + result[key] = tassign(result[key], arg-1, data[key]) + +print("statistic name,baseline,proposed,Δ,%,|%|") +for k,v in collections.OrderedDict(sorted(result.items())).items(): + # Ignore non-changed stats + if v[0] == v[1]: + continue; + reldiff = v[1]-v[0] + absdiff = 0 + absabsdiff = 0 + if v[0]: + absdiff = reldiff/v[0] + absabsdiff = abs(absdiff) + print('{},{:d},{:d},{:+d},{:+.04f},{:.04f}'.format(k,v[0],v[1],reldiff,absdiff,absabsdiff))