Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2527,6 +2527,7 @@ // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine @@ -2534,6 +2535,7 @@ EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::BSWAP || // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || Index: lib/Transforms/InstCombine/InstCombine.h =================================================================== --- lib/Transforms/InstCombine/InstCombine.h +++ lib/Transforms/InstCombine/InstCombine.h @@ -401,6 +401,7 @@ APInt &UndefElts, unsigned Depth = 0); Value *SimplifyVectorOp(BinaryOperator &Inst); + Value *SimplifyBSwap(BinaryOperator &Inst); // FoldOpIntoPhi - Given a binary operator, cast instruction, or select // which has a PHI node as operand #0, see if we can fold the instruction Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -117,6 +117,57 @@ return Builder->CreateFCmp(Pred, LHS, RHS); } +/// \brief Transform BITWISE_OP(BSWAP(A),BSWAP(B)) to BSWAP(BITWISE_OP(A, B)) +/// \param I Binary operator to transform. +/// \return Pointer to node that must replace the original binary operator, or +/// null pointer if no transformation was made. +Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) { + IntegerType *ITy = dyn_cast(I.getType()); + + // Can't do vectors. + if (I.getType()->isVectorTy()) return nullptr; + + // Can only do bitwise ops. + unsigned Op = I.getOpcode(); + if (Op != Instruction::And && Op != Instruction::Or && + Op != Instruction::Xor) + return nullptr; + + Value *OldLHS = I.getOperand(0); + Value *OldRHS = I.getOperand(1); + ConstantInt *ConstLHS = dyn_cast(OldLHS); + ConstantInt *ConstRHS = dyn_cast(OldRHS); + IntrinsicInst *IntrLHS = dyn_cast(OldLHS); + IntrinsicInst *IntrRHS = dyn_cast(OldRHS); + bool IsBswapLHS = (IntrLHS && IntrLHS->getIntrinsicID() == Intrinsic::bswap); + bool IsBswapRHS = (IntrRHS && IntrRHS->getIntrinsicID() == Intrinsic::bswap); + + /// OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) + /// OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) + if ((IsBswapLHS || IsBswapRHS) && (IsBswapLHS || ConstLHS) + && (IsBswapRHS || ConstRHS)) { + Value *NewLHS = IsBswapLHS ? IntrLHS->getOperand(0) : + Builder->getInt(ConstLHS->getValue().byteSwap()); + + Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) : + Builder->getInt(ConstRHS->getValue().byteSwap()); + + Value *BinOp = nullptr; + if (Op == Instruction::And) + BinOp = Builder->CreateAnd(NewLHS, NewRHS); + else if (Op == Instruction::Or) + BinOp = Builder->CreateOr(NewLHS, NewRHS); + else //if (Op == Instruction::Xor) + BinOp = Builder->CreateXor(NewLHS, NewRHS); + + Module *M = I.getParent()->getParent()->getParent(); + Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy); + return Builder->CreateCall(F, BinOp); + } + + return nullptr; +} + // OptAndOp - This handles expressions of the form ((val OP C1) & C2). Where // the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'. Op is // guaranteed to be a binary operator. @@ -1120,6 +1171,9 @@ if (SimplifyDemandedInstructionBits(I)) return &I; + if (Value *V = SimplifyBSwap(I)) + return ReplaceInstUsesWith(I, V); + if (ConstantInt *AndRHS = dyn_cast(Op1)) { const APInt &AndRHSMask = AndRHS->getValue(); @@ -2045,6 +2099,9 @@ if (SimplifyDemandedInstructionBits(I)) return &I; + if (Value *V = SimplifyBSwap(I)) + return ReplaceInstUsesWith(I, V); + if (ConstantInt *RHS = dyn_cast(Op1)) { ConstantInt *C1 = nullptr; Value *X = nullptr; // (X & C1) | C2 --> (X | C2) & (C1|C2) @@ -2406,6 +2463,9 @@ if (SimplifyDemandedInstructionBits(I)) return &I; + if (Value *V = SimplifyBSwap(I)) + return ReplaceInstUsesWith(I, V); + // Is this a ~ operation? if (Value *NotOp = dyn_castNotVal(&I)) { if (BinaryOperator *Op0I = dyn_cast(NotOp)) { Index: test/Transforms/InstCombine/bswap-fold.ll =================================================================== --- test/Transforms/InstCombine/bswap-fold.ll +++ test/Transforms/InstCombine/bswap-fold.ll @@ -12,29 +12,23 @@ ret i1 %tmp.upgrd.1 } -declare i32 @llvm.bswap.i32(i32) - define i1 @test3(i64 %tmp) { %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp ) %tmp.upgrd.2 = icmp eq i64 %tmp34, 1 ret i1 %tmp.upgrd.2 } -declare i64 @llvm.bswap.i64(i64) - -declare i16 @llvm.bswap.i16(i16) - ; rdar://5992453 ; A & 255 define i32 @test4(i32 %a) nounwind { entry: - %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a ) + %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a ) %tmp4 = lshr i32 %tmp2, 24 ret i32 %tmp4 } ; A -define i32 @test5(i32 %a) nounwind { +define i32 @test5(i32 %a) nounwind { entry: %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a ) %tmp4 = tail call i32 @llvm.bswap.i32( i32 %tmp2 ) @@ -42,16 +36,16 @@ } ; a >> 24 -define i32 @test6(i32 %a) nounwind { +define i32 @test6(i32 %a) nounwind { entry: - %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a ) + %tmp2 = tail call i32 @llvm.bswap.i32( i32 %a ) %tmp4 = and i32 %tmp2, 255 ret i32 %tmp4 } ; PR5284 define i16 @test7(i32 %A) { - %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind + %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind %C = trunc i32 %B to i16 %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind ret i16 %D @@ -69,3 +63,91 @@ %a = call i64 @llvm.bswap.i64(i64 undef) ret i64 %a } + +; PR15782 +; Fold: OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) ) +; Fold: OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) ) +define i16 @bs_and16i(i16 %a, i16 %b) #0 { + %1 = tail call i16 @llvm.bswap.i16(i16 %a) + %2 = and i16 %1, 10001 + %3 = tail call i16 @llvm.bswap.i16(i16 %2) + ret i16 %3 +}define i16 @bs_and16(i16 %a, i16 %b) #0 { + %1 = tail call i16 @llvm.bswap.i16(i16 %a) + %2 = tail call i16 @llvm.bswap.i16(i16 %b) + %3 = and i16 %2, %1 + %4 = tail call i16 @llvm.bswap.i16(i16 %3) + ret i16 %4 +} +define i16 @bs_or16(i16 %a, i16 %b) #0 { + %1 = tail call i16 @llvm.bswap.i16(i16 %a) + %2 = tail call i16 @llvm.bswap.i16(i16 %b) + %3 = or i16 %2, %1 + %4 = tail call i16 @llvm.bswap.i16(i16 %3) + ret i16 %4 +} +define i16 @bs_xor16(i16 %a, i16 %b) #0 { + %1 = tail call i16 @llvm.bswap.i16(i16 %a) + %2 = tail call i16 @llvm.bswap.i16(i16 %b) + %3 = xor i16 %2, %1 + %4 = tail call i16 @llvm.bswap.i16(i16 %3) + ret i16 %4 +} + +define i32 @bs_and32i(i32 %a, i32 %b) #0 { + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = and i32 %1, 100001 + %3 = tail call i32 @llvm.bswap.i32(i32 %2) + ret i32 %3 +}define i32 @bs_and32(i32 %a, i32 %b) #0 { + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = tail call i32 @llvm.bswap.i32(i32 %b) + %3 = and i32 %2, %1 + %4 = tail call i32 @llvm.bswap.i32(i32 %3) + ret i32 %4 +} +define i32 @bs_or32(i32 %a, i32 %b) #0 { + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = tail call i32 @llvm.bswap.i32(i32 %b) + %3 = or i32 %2, %1 + %4 = tail call i32 @llvm.bswap.i32(i32 %3) + ret i32 %4 +} +define i32 @bs_xor32(i32 %a, i32 %b) #0 { + %1 = tail call i32 @llvm.bswap.i32(i32 %a) + %2 = tail call i32 @llvm.bswap.i32(i32 %b) + %3 = xor i32 %2, %1 + %4 = tail call i32 @llvm.bswap.i32(i32 %3) + ret i32 %4 +} + +define i64 @bs_and64i(i64 %a, i64 %b) #0 { + %1 = tail call i64 @llvm.bswap.i64(i64 %a) + %2 = and i64 %1, 1000000001 + %3 = tail call i64 @llvm.bswap.i64(i64 %2) + ret i64 %3 +}define i64 @bs_and64(i64 %a, i64 %b) #0 { + %1 = tail call i64 @llvm.bswap.i64(i64 %a) + %2 = tail call i64 @llvm.bswap.i64(i64 %b) + %3 = and i64 %2, %1 + %4 = tail call i64 @llvm.bswap.i64(i64 %3) + ret i64 %4 +} +define i64 @bs_or64(i64 %a, i64 %b) #0 { + %1 = tail call i64 @llvm.bswap.i64(i64 %a) + %2 = tail call i64 @llvm.bswap.i64(i64 %b) + %3 = or i64 %2, %1 + %4 = tail call i64 @llvm.bswap.i64(i64 %3) + ret i64 %4 +} +define i64 @bs_xor64(i64 %a, i64 %b) #0 { + %1 = tail call i64 @llvm.bswap.i64(i64 %a) + %2 = tail call i64 @llvm.bswap.i64(i64 %b) + %3 = xor i64 %2, %1 + %4 = tail call i64 @llvm.bswap.i64(i64 %3) + ret i64 %4 +} + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i64 @llvm.bswap.i64(i64)