Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2220,14 +2220,18 @@ ConstantInt *C1 = nullptr, *C2 = nullptr; // (A | B) | C and A | (B | C) -> bswap if possible. + bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) || + match(Op1, m_Or(m_Value(), m_Value())); // (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible. - if (match(Op0, m_Or(m_Value(), m_Value())) || - match(Op1, m_Or(m_Value(), m_Value())) || - (match(Op0, m_LogicalShift(m_Value(), m_Value())) && - match(Op1, m_LogicalShift(m_Value(), m_Value())))) { + bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) && + match(Op1, m_LogicalShift(m_Value(), m_Value())); + // (A & B) | (C & D) -> bswap if possible. + bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) && + match(Op1, m_And(m_Value(), m_Value())); + + if (OrOfOrs || OrOfShifts || OrOfAnds) if (Instruction *BSwap = MatchBSwap(I)) return BSwap; - } // (X^C)|Y -> (X|Y)^C iff Y&C == 0 if (Op0->hasOneUse() && Index: test/CodeGen/AArch64/bswap.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/bswap.ll @@ -0,0 +1,39 @@ +; RUN: llc -march=aarch64 < %s | FileCheck %s + +; Test byte swap instrinsic lowering on AArch64 targets. + +define i16 @bswap16(i16 %x) #0 { + %1 = tail call i16 @llvm.bswap.i16(i16 %x) + ret i16 %1 +; CHECK-LABEL: bswap16 +; CHECK: rev [[R0:w[0-9]+]], {{w[0-9]+}} +; CHECK-NEXT: lsr {{w[0-9]+}}, [[R0]], #16 +} + +define i32 @bswap32(i32 %x) #0 { + %1 = tail call i32 @llvm.bswap.i32(i32 %x) + ret i32 %1 +; CHECK-LABEL: bswap32 +; CHECK: rev [[R0:w[0-9]+]], [[R0]] +} + +define i48 @bswap48(i48 %x) #0 { + %1 = tail call i48 @llvm.bswap.i48(i48 %x) + ret i48 %1 +; CHECK-LABEL: bswap48 +; CHECK: rev [[R0:x[0-9]+]], {{x[0-9]+}} +; CHECK-NEXT: lsr {{x[0-9]+}}, [[R0]], #16 +} + +define i64 @bswap64(i64 %x) #0 { + %1 = tail call i64 @llvm.bswap.i64(i64 %x) + ret i64 %1 +; CHECK-LABEL: bswap64 +; CHECK: rev [[R0:x[0-9]+]], [[R0]] +; CHECK-NOT: rev +} + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i48 @llvm.bswap.i48(i48) +declare i64 @llvm.bswap.i64(i64) Index: test/CodeGen/ARM/bswap.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/bswap.ll @@ -0,0 +1,145 @@ +; RUN: llc -march=arm -mattr=+v4t < %s | FileCheck -check-prefix PRE6 %s +; RUN: llc -march=armeb -mattr=+v4t < %s | FileCheck -check-prefix PRE6BE %s +; RUN: llc -march=arm -mattr=+v5t < %s | FileCheck -check-prefix PRE6 %s +; RUN: llc -march=arm -mattr=+v5te < %s | FileCheck -check-prefix PRE6 %s +; RUN: llc -march=arm -mattr=+v6 < %s | FileCheck -check-prefix REV %s +; RUN: llc -march=armeb -mattr=+v6 < %s | FileCheck -check-prefix REVBE %s +; RUN: llc -march=arm -mattr=+v6k < %s | FileCheck -check-prefix REV %s +; RUN: llc -march=arm -mattr=+v6m < %s | FileCheck -check-prefix REV %s +; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck -check-prefix REV %s +; RUN: llc -march=arm -mattr=+v7 < %s | FileCheck -check-prefix REV %s +; RUN: llc -march=arm -mattr=+v8 < %s | FileCheck -check-prefix REV %s +; RUN: llc -march=arm -mattr=+v8.1a < %s | FileCheck -check-prefix REV %s + +;; Test byte swap instrinsic lowering on ARM targets. + +;; The REV instruction only appeared in ARMv6 and later. Earlier +;; supported architecture have to open-code this intrinsic. + +define i16 @bswap16(i16 %x) #0 { + %1 = tail call i16 @llvm.bswap.i16(i16 %x) + ret i16 %1 +; PRE6-LABEL: bswap16 +;; The source register patterns are all capable of matching a new +;; register to avoid specifying allocation choices unnecessarily. +; PRE6: mov [[R1:r[0-9]+|lr]], #16711680 +; PRE6-NEXT: and [[R2:r[0-9]+|lr]], [[R1]], [[R0:r[0-9]]], lsl #8 +; PRE6-NEXT: orr [[R3:r[0-9]+|lr]], [[R2]], [[R0]], lsl #24 +; PRE6-NEXT: lsr [[R4:r[0-9]+|lr]], [[R3]], #16 + +; REV-LABEL: bswap16 +;; FIXME: It would ben nice if DAG legalization was taught to not +;; promote the incoming reg to i32 in this case, so that the +;; combiner could canonicalize this to (rotr (bswap x), 16), which +;; would then get matched as REV16. +; REV-DAG: rev [[R0:r[0-9]+|lr]], {{r[0-9]+}} +; REV-DAG: lsr {{r[0-9]+}}, [[R0]], #16 +} + +define i32 @bswap32(i32 %x) #0 { + %1 = tail call i32 @llvm.bswap.i32(i32 %x) + ret i32 %1 +; PRE6-LABEL: bswap32 +; PRE6-DAG: mov [[R0:r[0-9]+|lr]], #65280 +; PRE6-NOT: DAG-BREAK! +; PRE6-DAG: and [[R0]], [[R0]], [[R1:r[0-9]+|lr]], lsr #8 +; PRE6-DAG: orr [[R0]], [[R0]], [[R1]], lsr #24 +; PRE6-DAG: mov [[R2:r[0-9]+|lr]], #16711680 +; PRE6-DAG: and [[R2]], [[R2]], [[R1]], lsl #8 +; PRE6-DAG: orr [[R1]], [[R2]], [[R1]], lsl #24 +; PRE6-NOT: DAG-BREAK! +; PRE6-DAG: orr [[R1]], [[R1]], [[R0]] + +; REV-LABEL: bswap32 +; REV: rev {{r[0-9]+|lr}}, {{r[0-9]+|lr}} +; REV-NOT: rev +} + +define i48 @bswap48(i48 %x) #0 { + %1 = tail call i48 @llvm.bswap.i48(i48 %x) + ret i48 %1 +; PRE6-LABEL: bswap48 +; PRE6-DAG: mov [[R0:r[0-9]+|lr]], #65280 +; PRE6-DAG: mov [[R1:r[0-9]+|lr]], #16711680 +; PRE6-NOT: DAG-BREAK! +; PRE6-DAG: and [[R0]], [[R0]], [[R2:r[0-9]+|lr]], lsr #8 +; PRE6-DAG: and [[R3:r[0-9]+|lr]], [[R1]], [[R2]], lsl #8 +; PRE6-DAG: orr [[R0]], [[R0]], [[R2]], lsr #24 +; PRE6-DAG: orr [[R2]], [[R3]], [[R2]], lsl #24 +; PRE6-DAG: orr [[R0]], [[R2]], [[R0]] +; PRE6-NOT: DAG-BREAK! +; PRE6-DAG: and [[R2]], [[R1]], [[R4:r[0-9]+|lr]], lsl #8 +; PRE6-DAG: orr [[R2]], [[R2]], [[R4]], lsl #24 +; PRE6-DAG: lsr [[R4]], [[R0]], #16 +; PRE6-DAG: lsr [[R2]], [[R2]], #16 +; PRE6-NOT: DAG-BREAK! +; PRE6-DAG: orr [[R2]], [[R2]], [[R0]], lsl #16 + +; PRE6BE-LABEL: bswap48 +; PRE6BE-DAG: mov [[R0:r[0-9]+|lr]], #65280 +; PRE6BE-DAG: mov [[R1:r[0-9]+|lr]], #16711680 +; PRE6-NOT: DAG-BREAK! +; PRE6BE-DAG: and [[R0]], [[R0]], [[R2:r[0-9]+|lr]], lsr #8 +; PRE6BE-DAG: and [[R3:r[0-9]+|lr]], [[R1]], [[R2:r[0-9]+|lr]], lsl #8 +; PRE6BE-DAG: orr [[R0]], [[R0]], [[R2]], lsr #24 +; PRE6BE-DAG: orr [[R2]], [[R3]], [[R2]], lsl #24 +; PRE6BE-DAG: orr [[R0]], [[R2]], [[R0]] +; PRE6-NOT: DAG-BREAK! +; PRE6BE-DAG: and [[R2]], [[R1]], [[R4:r[0-9]+|lr]], lsl #8 +; PRE6BE-DAG: orr [[R4]], [[R2]], [[R4]], lsl #24 +; PRE6BE-DAG: lsr [[R4]], [[R4]], #16 +; PRE6BE-DAG: lsr [[R4]], [[R0]], #16 +; PRE6-NOT: DAG-BREAK! +; PRE6BE-DAG: orr [[R2]], [[R4]], [[R0]], lsl #16 + +; REV-LABEL: bswap48 +; REV-DAG: rev [[R0:r[0-9]+]], [[R1:r[0-9]+]] +; REV-DAG: rev [[R1]], [[R2:r[0-9]+]] +; REV-DAG: lsr [[R1]], [[R1]], #16 +; REV-DAG: lsr [[R2]], [[R0]], #16 +; REV-DAG: orr [[R1]], [[R1]], [[R0]], lsl #16 + +; REVBE-LABEL: bswap48 +; Until PR24879 is fixed, I can't match [[R0:r[0-9]+|lr]] in a +; backreference. Having to stick to just r[0-9]+ for now, which +; is *very* likely to be the register selection :-). +; REVBE-DAG: rev [[R0:r[0-9]+]], [[R0]] +; Need to break DAG block here to stop R1 or R2 clobbering the +; self rev above. +; REVBE-NEXT: rev [[R1:r[0-9]+|lr]], [[R2:r[0-9]+|lr]] +; REVBE-DAG: lsr [[R0]], [[R0]], #16 +; REVBE-DAG: lsr [[R0]], [[R1]], #16 +; REVBE-DAG: orr [[R2]], [[R0]], [[R1]], lsl #16 +} + +define i64 @bswap64(i64 %x) #0 { + %1 = tail call i64 @llvm.bswap.i64(i64 %x) + ret i64 %1 +; PRE6-LABEL: bswap64 +; PRE6-DAG: mov [[R0:r[0-9]+|lr]], #65280 +; PRE6-DAG: mov [[R1:r[0-9]+|lr]], #16711680 +; PRE6-NOT: DAG-BREAK! +; PRE6-DAG: and [[R2:r[0-9]+|lr]], [[R0]], [[R3:r[0-9]+|lr]], lsr #8 +; PRE6-DAG: and [[R4:r[0-9]+|lr]], [[R1]], [[R3]], lsl #8 +; PRE6-DAG: orr [[R2]], [[R2]], [[R3]], lsr #24 +; PRE6: orr [[R3]], [[R4]], [[R3]], lsl #24 +; PRE6-NOT: DAG-BREAK! +; PRE6-DAG: and [[R4]], [[R1]], [[R5:r[0-9]+|lr]], lsl #8 +; PRE6-DAG: orr [[R2]], [[R3]], [[R2]] +; PRE6-DAG: and [[R3]], [[R0]], [[R5]], lsr #8 +; PRE6-DAG: orr [[R3]], [[R3]], [[R5]], lsr #24 +; PRE6-DAG: orr [[R5]], [[R4]], [[R5]], lsl #24 +; PRE6-DAG: orr [[R3]], [[R5]], [[R3]] + +; REV-LABEL: bswap64 +; Just check that the two 32-bit words are reversed, not bothered +; so much about regiter selection here. +; REV: rev +; REV: rev +; REV-NOT: rev +} + +declare i16 @llvm.bswap.i16(i16) +declare i32 @llvm.bswap.i32(i32) +declare i48 @llvm.bswap.i48(i48) +declare i64 @llvm.bswap.i64(i64) Index: test/Transforms/InstCombine/bswap.ll =================================================================== --- test/Transforms/InstCombine/bswap.ll +++ test/Transforms/InstCombine/bswap.ll @@ -1,7 +1,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" ; RUN: opt < %s -instcombine -S | \ -; RUN: grep "call.*llvm.bswap" | count 6 +; RUN: grep "call.*llvm.bswap" | count 7 define i32 @test1(i32 %i) { %tmp1 = lshr i32 %i, 24 ; [#uses=1] @@ -72,3 +72,15 @@ ret i32 %tmp7 } +; PR23863 +define i32 @test7(i32 %x) { + %shl = shl i32 %x, 16 + %shr = lshr i32 %x, 16 + %or = or i32 %shl, %shr + %and2 = shl i32 %or, 8 + %shl3 = and i32 %and2, -16711936 + %and4 = lshr i32 %or, 8 + %shr5 = and i32 %and4, 16711935 + %or6 = or i32 %shl3, %shr5 + ret i32 %or6 +}