Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -426,6 +426,7 @@ unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); + SDValue CombineZExtLogicopShiftLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); @@ -7470,6 +7471,86 @@ return SDValue(N, 0); // Return N so it doesn't get rechecked! } +// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> +// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) +SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { + assert(N->getOpcode() == ISD::ZERO_EXTEND); + EVT VT = N->getValueType(0); + + // and/or/xor + SDValue N0 = N->getOperand(0); + if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) || + N0.getOperand(1).getOpcode() != ISD::Constant || + (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) + return SDValue(); + + // shl/shr + SDValue N1 = N0->getOperand(0); + if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || + N1.getOperand(1).getOpcode() != ISD::Constant || + (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) + return SDValue(); + + // load + if (!isa(N1.getOperand(0))) + return SDValue(); + LoadSDNode *Load = cast(N1.getOperand(0)); + EVT MemVT = Load->getMemoryVT(); + if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || + Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) + return SDValue(); + + + // If the shift op is SHL, the logic op must be AND, otherwise the result + // will be wrong. + if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) + return SDValue(); + + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + + SmallVector SetCCs; + if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI)) + return SDValue(); + + // Actually do the transformation. + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, + Load->getChain(), Load->getBasePtr(), + Load->getMemoryVT(), Load->getMemOperand()); + + auto ShiftNode = cast(N1.getOperand(1)); + EVT ShiftVT = ShiftNode->getValueType(0); + // In some backends, the VT of shift constant can be larger than the other + // operand, so we use the max of them. + if (VT.bitsGT(ShiftVT)) + ShiftVT = VT; + APInt ShiftCst = ShiftNode->getAPIntValue(); + ShiftCst = ShiftCst.zextOrSelf(ShiftVT.getSizeInBits()); + SDLoc DL1(N1); + SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, + DAG.getConstant(ShiftCst, DL1, ShiftVT)); + + APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL0(N0); + SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, + DAG.getConstant(Mask, DL0, VT)); + + ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, SDLoc(Load), + ISD::ZERO_EXTEND); + CombineTo(N, And); + if (SDValue(Load, 0).hasOneUse()) { + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), + Load->getValueType(0), ExtLoad); + CombineTo(Load, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N,0); // Return N so it doesn't get rechecked! +} + /// If we're narrowing or widening the result of a vector select and the final /// size is the same size as a setcc (compare) feeding the select, then try to /// apply the cast operation to the select's operands because matching vector @@ -7988,6 +8069,11 @@ } } + // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> + // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) + if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) + return ZExtLoad; + // fold (zext (zextload x)) -> (zext (truncate (zextload x))) // fold (zext ( extload x)) -> (zext (truncate (zextload x))) if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && Index: test/CodeGen/AArch64/zext-logic-shift-load.ll =================================================================== --- test/CodeGen/AArch64/zext-logic-shift-load.ll +++ test/CodeGen/AArch64/zext-logic-shift-load.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple=aarch64-linux-gnu < %s -o - | FileCheck %s + +define i32 @test1(i8* %p) { +; CHECK: ldrb +; CHECK-NEXT: ubfx +; CHECK-NEXT: ret + + %1 = load i8, i8* %p + %2 = lshr i8 %1, 1 + %3 = and i8 %2, 1 + %4 = zext i8 %3 to i32 + ret i32 %4 +} + Index: test/CodeGen/ARM/zext-logic-shift-load.ll =================================================================== --- test/CodeGen/ARM/zext-logic-shift-load.ll +++ test/CodeGen/ARM/zext-logic-shift-load.ll @@ -0,0 +1,17 @@ +; RUN: llc -mtriple=armv7-linux-gnu < %s -o - | FileCheck %s + +define void @test1(i8* %p, i16* %q) { +; CHECK: ldrb +; CHECK-NEXT: mov +; CHECK-NEXT: and +; CHECK-NEXT: strh +; CHECK-NEXT: bx + + %1 = load i8, i8* %p + %2 = shl i8 %1, 2 + %3 = and i8 %2, 12 + %4 = zext i8 %3 to i16 + store i16 %4, i16* %q + ret void +} + Index: test/CodeGen/X86/zext-logicop-shift-load.ll =================================================================== --- test/CodeGen/X86/zext-logicop-shift-load.ll +++ test/CodeGen/X86/zext-logicop-shift-load.ll @@ -0,0 +1,122 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s + + +define i64 @test1(i8* %data) { +; CHECK-LABEL: test1: +; CHECK: movzbl +; CHECK-NEXT: shlq +; CHECK-NEXT: andl +; CHECK-NEXT: retq +entry: + %bf.load = load i8, i8* %data, align 4 + %bf.clear = shl i8 %bf.load, 2 + %0 = and i8 %bf.clear, 60 + %mul = zext i8 %0 to i64 + ret i64 %mul +} + +define i8* @test2(i8* %data) { +; CHECK-LABEL: test2: +; CHECK: movzbl +; CHECK-NEXT: andl +; CHECK-NEXT: leaq +; CHECK-NEXT: retq +entry: + %bf.load = load i8, i8* %data, align 4 + %bf.clear = shl i8 %bf.load, 2 + %0 = and i8 %bf.clear, 60 + %mul = zext i8 %0 to i64 + %add.ptr = getelementptr inbounds i8, i8* %data, i64 %mul + ret i8* %add.ptr +} + +; If the shift op is SHL, the logic op can only be AND. +define i64 @test3(i8* %data) { +; CHECK-LABEL: test3: +; CHECK: movb +; CHECK-NEXT: shlb +; CHECK-NEXT: xorb +; CHECK-NEXT: movzbl +; CHECK-NEXT: retq +entry: + %bf.load = load i8, i8* %data, align 4 + %bf.clear = shl i8 %bf.load, 2 + %0 = xor i8 %bf.clear, 60 + %mul = zext i8 %0 to i64 + ret i64 %mul +} + +define i64 @test4(i8* %data) { +; CHECK-LABEL: test4: +; CHECK: movzbl +; CHECK-NEXT: shrq +; CHECK-NEXT: andl +; CHECK-NEXT: retq +entry: + %bf.load = load i8, i8* %data, align 4 + %bf.clear = lshr i8 %bf.load, 2 + %0 = and i8 %bf.clear, 60 + %1 = zext i8 %0 to i64 + ret i64 %1 +} + +define i64 @test5(i8* %data) { +; CHECK-LABEL: test5: +; CHECK: movzbl +; CHECK-NEXT: shrq +; CHECK-NEXT: xorq +; CHECK-NEXT: retq +entry: + %bf.load = load i8, i8* %data, align 4 + %bf.clear = lshr i8 %bf.load, 2 + %0 = xor i8 %bf.clear, 60 + %1 = zext i8 %0 to i64 + ret i64 %1 +} + +define i64 @test6(i8* %data) { +; CHECK-LABEL: test6: +; CHECK: movzbl +; CHECK-NEXT: shrq +; CHECK-NEXT: orq +; CHECK-NEXT: retq +entry: + %bf.load = load i8, i8* %data, align 4 + %bf.clear = lshr i8 %bf.load, 2 + %0 = or i8 %bf.clear, 60 + %1 = zext i8 %0 to i64 + ret i64 %1 +} + +; Don't do the folding if the other operand isn't a constant. +define i64 @test7(i8* %data, i8 %logop) { +; CHECK-LABEL: test7: +; CHECK: movb +; CHECK-NEXT: shrb +; CHECK-NEXT: orb +; CHECK-NEXT: movzbl +; CHECK-NEXT: retq +entry: + %bf.load = load i8, i8* %data, align 4 + %bf.clear = lshr i8 %bf.load, 2 + %0 = or i8 %bf.clear, %logop + %1 = zext i8 %0 to i64 + ret i64 %1 +} + +; Load is folded with sext. +define i64 @test8(i8* %data) { +; CHECK-LABEL: test8: +; CHECK: movsbl +; CHECK-NEXT: movzwl +; CHECK-NEXT: shrl +; CHECK-NEXT: orl +entry: + %bf.load = load i8, i8* %data, align 4 + %ext = sext i8 %bf.load to i16 + %bf.clear = lshr i16 %ext, 2 + %0 = or i16 %bf.clear, 60 + %1 = zext i16 %0 to i64 + ret i64 %1 +} +