Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -419,6 +419,15 @@ SmallVectorImpl &Chains, EVT Ty) const; + /// This is a helper function for visitAND and visitZERO_EXTEND. Returns + /// true if the (and (load) C) pattern matches an extload. ExtVT returns + /// the type of the loaded value to be extended. LoadedVT returns the type + /// of the original loaded value. NarrowLoad returns whether the load would + /// need to be narrowed in order to match. + bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, + EVT &ExtVT, EVT &LoadedVT, bool &NarrowLoad); + /// This is a helper function for MergeConsecutiveStores. When the source /// elements of the consecutive stores are all constants or all extracted /// vector elements, try to merge them into one larger store. @@ -2977,6 +2986,38 @@ return SDValue(); } +bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, + EVT &ExtVT, EVT &LoadedVT, bool &NarrowLoad){ + uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); + + if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) + return false; + + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + LoadedVT = LoadN->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { + NarrowLoad = false; + return true; + } + + // Do not change the width of a volatile load. + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LoadN->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT)) && + TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) { + NarrowLoad = true; + return true; + } + + return false; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3169,16 +3210,12 @@ : cast(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { - uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); - if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT LoadedVT = LN0->getMemoryVT(); - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - - if (ExtVT == LoadedVT && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, - ExtVT))) { - + auto NarrowLoad = false; + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, + ExtVT, LoadedVT, NarrowLoad)) { + if (!NarrowLoad) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), LN0->getBasePtr(), ExtVT, @@ -3186,15 +3223,7 @@ AddToWorklist(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - - // Do not change the width of a volatile load. - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, - ExtVT)) && - TLI.shouldReduceLoadWidth(LN0, ISD::ZEXTLOAD, ExtVT)) { + } else { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); @@ -6334,6 +6363,8 @@ // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) + // Unless (and (load x) cst) will match as a zextload already and has + // additional users. if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::XOR) && isa(N0.getOperand(0)) && @@ -6344,9 +6375,20 @@ if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { bool DoXform = true; SmallVector SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, - SetCCs, TLI); + if (!N0.hasOneUse()) { + if (N0.getOpcode() == ISD::AND) { + auto *AndC = cast(N0.getOperand(1)); + auto NarrowLoad = false; + EVT LoadResultTy = AndC->getValueType(0); + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, + ExtVT, LoadedVT, NarrowLoad)) + DoXform = false; + } + if (DoXform) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI); + } if (DoXform) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), Index: test/CodeGen/AArch64/free-zext.ll =================================================================== --- test/CodeGen/AArch64/free-zext.ll +++ test/CodeGen/AArch64/free-zext.ll @@ -1,7 +1,7 @@ ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s define i64 @test_free_zext(i8* %a, i16* %b) { -; CHECK-LABEL: test_free_zext +; CHECK-LABEL: test_free_zext: ; CHECK-DAG: ldrb w[[A:[0-9]+]], [x0] ; CHECK: ldrh w[[B:[0-9]+]], [x1] ; CHECK: add x0, x[[B]], x[[A]] @@ -12,3 +12,17 @@ %add = add nsw i64 %conv1, %conv ret i64 %add } + +define void @test_free_zext2(i32* %ptr, i32* %dst1, i64* %dst2) { +; CHECK-LABEL: test_free_zext2: +; CHECK: ldrh w[[A:[0-9]+]], [x0] +; CHECK-NOT: and x +; CHECK: str w[[A]], [x1] +; CHECK: str x[[A]], [x2] + %load = load i32, i32* %ptr, align 8 + %load16 = and i32 %load, 65535 + %load64 = zext i32 %load16 to i64 + store i32 %load16, i32* %dst1, align 4 + store i64 %load64, i64* %dst2, align 8 + ret void +}