Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -681,6 +681,34 @@ return nullptr; } +static Instruction *narrowLoad(TruncInst &Trunc, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { + // Check the layout to ensure we are not creating an unsupported operation. + // TODO: Create a GEP to offset the load? + if (!DL.isLittleEndian()) + return nullptr; + unsigned NarrowBitWidth = Trunc.getDestTy()->getPrimitiveSizeInBits(); + if (!DL.isLegalInteger(NarrowBitWidth)) + return nullptr; + + // Match a truncated load with no other uses. + Value *X; + if (!match(Trunc.getOperand(0), m_OneUse(m_Load(m_Value(X))))) + return nullptr; + LoadInst *WideLoad = cast(Trunc.getOperand(0)); + if (!WideLoad->isSimple()) + return nullptr; + + // trunc (load X) --> load (bitcast X) + PointerType *PtrTy = PointerType::get(Trunc.getDestTy(), + WideLoad->getPointerAddressSpace()); + Value *Bitcast = Builder.CreateBitCast(X, PtrTy); + LoadInst *NarrowLoad = new LoadInst(Bitcast); + NarrowLoad->setAlignment(WideLoad->getAlignment()); + return NarrowLoad; +} + Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -840,6 +868,9 @@ if (Instruction *I = foldVecTruncToExtElt(CI, *this)) return I; + if (Instruction *NewLoad = narrowLoad(CI, Builder, DL)) + return NewLoad; + return nullptr; } Index: llvm/test/Transforms/InstCombine/trunc-load.ll =================================================================== --- llvm/test/Transforms/InstCombine/trunc-load.ll +++ llvm/test/Transforms/InstCombine/trunc-load.ll @@ -2,28 +2,44 @@ ; RUN: opt < %s -instcombine -S -data-layout="e-n16:32:64" | FileCheck %s --check-prefixes=CHECK,LE ; RUN: opt < %s -instcombine -S -data-layout="E-n16:32:64" | FileCheck %s --check-prefixes=CHECK,BE +; On little-endian, we can narrow the load without an offset. + define i32 @truncload(i64* %ptr) { -; CHECK-LABEL: @truncload( -; CHECK-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; LE-LABEL: @truncload( +; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32* +; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 +; LE-NEXT: ret i32 [[R]] +; +; BE-LABEL: @truncload( +; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4 +; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 +; BE-NEXT: ret i32 [[R]] ; %x = load i64, i64* %ptr %r = trunc i64 %x to i32 ret i32 %r } +; Preserve alignment. + define i16 @truncload_align(i32* %ptr) { -; CHECK-LABEL: @truncload_align( -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16 -; CHECK-NEXT: ret i16 [[R]] +; LE-LABEL: @truncload_align( +; LE-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to i16* +; LE-NEXT: [[R:%.*]] = load i16, i16* [[TMP1]], align 16 +; LE-NEXT: ret i16 [[R]] +; +; BE-LABEL: @truncload_align( +; BE-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16 +; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16 +; BE-NEXT: ret i16 [[R]] ; %x = load i32, i32* %ptr, align 16 %r = trunc i32 %x to i16 ret i16 %r } +; Negative test - extra use means we would not eliminate the original load. + declare void @use(i64) define i32 @truncload_extra_use(i64* %ptr) { @@ -39,6 +55,8 @@ ret i32 %r } +; Negative test - don't create a load if the type is not allowed by the data-layout. + define i8 @truncload_type(i64* %ptr) { ; CHECK-LABEL: @truncload_type( ; CHECK-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 2 @@ -50,6 +68,8 @@ ret i8 %r } +; Negative test - don't transform volatiles. + define i32 @truncload_volatile(i64* %ptr) { ; CHECK-LABEL: @truncload_volatile( ; CHECK-NEXT: [[X:%.*]] = load volatile i64, i64* [[PTR:%.*]], align 8 @@ -61,11 +81,18 @@ ret i32 %r } +; Preserve address space. + define i32 @truncload_address_space(i64 addrspace(1)* %ptr) { -; CHECK-LABEL: @truncload_address_space( -; CHECK-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; LE-LABEL: @truncload_address_space( +; LE-NEXT: [[TMP1:%.*]] = bitcast i64 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* +; LE-NEXT: [[R:%.*]] = load i32, i32 addrspace(1)* [[TMP1]], align 4 +; LE-NEXT: ret i32 [[R]] +; +; BE-LABEL: @truncload_address_space( +; BE-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4 +; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 +; BE-NEXT: ret i32 [[R]] ; %x = load i64, i64 addrspace(1)* %ptr, align 4 %r = trunc i64 %x to i32