Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -681,6 +681,42 @@ return nullptr; } +static Instruction *narrowLoad(TruncInst &Trunc, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { + // Check the layout to ensure we are not creating an unsupported operation. + // TODO: Create a GEP to offset the load? + if (!DL.isLittleEndian()) + return nullptr; + unsigned NarrowBitWidth = Trunc.getDestTy()->getPrimitiveSizeInBits(); + if (!DL.isLegalInteger(NarrowBitWidth)) + return nullptr; + + // Match a truncated load with no other uses. + Value *X; + if (!match(Trunc.getOperand(0), m_OneUse(m_Load(m_Value(X))))) + return nullptr; + LoadInst *WideLoad = cast(Trunc.getOperand(0)); + if (!WideLoad->isSimple()) + return nullptr; + + // Don't narrow this load if we would lose information about the + // dereferenceable range. + bool CanBeNull; + uint64_t DerefBits = X->getPointerDereferenceableBytes(DL, CanBeNull) * 8; + if (DerefBits < WideLoad->getType()->getPrimitiveSizeInBits()) + return nullptr; + + // trunc (load X) --> load (bitcast X) + PointerType *PtrTy = PointerType::get(Trunc.getDestTy(), + WideLoad->getPointerAddressSpace()); + Value *Bitcast = Builder.CreatePointerCast(X, PtrTy); + LoadInst *NarrowLoad = new LoadInst(Trunc.getDestTy(), Bitcast); + NarrowLoad->setAlignment(WideLoad->getAlignment()); + copyMetadataForLoad(*NarrowLoad, *WideLoad); + return NarrowLoad; +} + Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *Result = commonCastTransforms(CI)) return Result; @@ -840,6 +876,9 @@ if (Instruction *I = foldVecTruncToExtElt(CI, *this)) return I; + if (Instruction *NewLoad = narrowLoad(CI, Builder, DL)) + return NewLoad; + return nullptr; } Index: llvm/trunk/test/Transforms/InstCombine/trunc-load.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/trunc-load.ll +++ llvm/trunk/test/Transforms/InstCombine/trunc-load.ll @@ -29,10 +29,15 @@ ; On little-endian, we can narrow the load without an offset. define i32 @truncload_deref(i64* dereferenceable(8) %ptr) { -; CHECK-LABEL: @truncload_deref( -; CHECK-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; LE-LABEL: @truncload_deref( +; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32* +; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4 +; LE-NEXT: ret i32 [[R]] +; +; BE-LABEL: @truncload_deref( +; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4 +; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 +; BE-NEXT: ret i32 [[R]] ; %x = load i64, i64* %ptr %r = trunc i64 %x to i32 @@ -42,10 +47,15 @@ ; Preserve alignment. define i16 @truncload_align(i32* dereferenceable(14) %ptr) { -; CHECK-LABEL: @truncload_align( -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16 -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16 -; CHECK-NEXT: ret i16 [[R]] +; LE-LABEL: @truncload_align( +; LE-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to i16* +; LE-NEXT: [[R:%.*]] = load i16, i16* [[TMP1]], align 16 +; LE-NEXT: ret i16 [[R]] +; +; BE-LABEL: @truncload_align( +; BE-NEXT: [[X:%.*]] = load i32, i32* [[PTR:%.*]], align 16 +; BE-NEXT: [[R:%.*]] = trunc i32 [[X]] to i16 +; BE-NEXT: ret i16 [[R]] ; %x = load i32, i32* %ptr, align 16 %r = trunc i32 %x to i16 @@ -98,12 +108,40 @@ ; Preserve address space. define i32 @truncload_address_space(i64 addrspace(1)* dereferenceable(8) %ptr) { -; CHECK-LABEL: @truncload_address_space( -; CHECK-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4 -; CHECK-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 -; CHECK-NEXT: ret i32 [[R]] +; LE-LABEL: @truncload_address_space( +; LE-NEXT: [[TMP1:%.*]] = bitcast i64 addrspace(1)* [[PTR:%.*]] to i32 addrspace(1)* +; LE-NEXT: [[R:%.*]] = load i32, i32 addrspace(1)* [[TMP1]], align 4 +; LE-NEXT: ret i32 [[R]] +; +; BE-LABEL: @truncload_address_space( +; BE-NEXT: [[X:%.*]] = load i64, i64 addrspace(1)* [[PTR:%.*]], align 4 +; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 +; BE-NEXT: ret i32 [[R]] ; %x = load i64, i64 addrspace(1)* %ptr, align 4 %r = trunc i64 %x to i32 ret i32 %r } + +; Most metadata should be transferred to the narrow load. +; TODO: We lost the range. + +define i32 @truncload_metadata(i64* dereferenceable(8) %ptr) { +; LE-LABEL: @truncload_metadata( +; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[PTR:%.*]] to i32* +; LE-NEXT: [[R:%.*]] = load i32, i32* [[TMP1]], align 4, !invariant.load !0, !nontemporal !1 +; LE-NEXT: ret i32 [[R]] +; +; BE-LABEL: @truncload_metadata( +; BE-NEXT: [[X:%.*]] = load i64, i64* [[PTR:%.*]], align 4, !range !0, !invariant.load !1, !nontemporal !2 +; BE-NEXT: [[R:%.*]] = trunc i64 [[X]] to i32 +; BE-NEXT: ret i32 [[R]] +; + %x = load i64, i64* %ptr, align 4, !invariant.load !0, !nontemporal !1, !range !2 + %r = trunc i64 %x to i32 + ret i32 %r +} + +!0 = !{} +!1 = !{i32 1} +!2 = !{i64 0, i64 2}