diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td @@ -146,28 +146,62 @@ let DecoderMethod = "decodeImmShiftOpValue"; } -// Optimize (or x, imm) to (BSETI x, log2(imm)). We should exclude the -// case can be opimized to (ORI32/ORI16 x, imm). -def uimm_bseti_1_XFORM : SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(llvm::Log2_32(I), SDLoc(N), + return CurDAG->getTargetConstant(llvm::countr_zero(I), SDLoc(N), N->getValueType(0)); }]>; -def uimm_bseti_1 : PatLeaf<(imm), [{ +def HighestSetBit : SDNodeXFormgetZExtValue(); - return llvm::popcount(I) == 1 && I > 0xffff; + return CurDAG->getTargetConstant(31 - llvm::countl_zero(I), + SDLoc(N), N->getValueType(0)); }]>; - -// Optimize (and x, imm) to (BCLRI x, log2(~imm)). We should exclude the -// case can be opimized to (ANDNI x, ~imm). -def uimm_bclri_1_XFORM : SDNodeXFormgetZExtValue(); - return CurDAG->getTargetConstant(llvm::Log2_32(I), SDLoc(N), + return CurDAG->getTargetConstant(llvm::countr_zero(I), SDLoc(N), N->getValueType(0)); }]>; -def uimm_bclri_1 : PatLeaf<(imm), [{ +def HighestZeroBit : SDNodeXFormgetZExtValue(); + return CurDAG->getTargetConstant(31 - llvm::countl_zero(I), + SDLoc(N), N->getValueType(0)); +}]>; + +// Optimize (or x, imm) to (BSETI x, log2(imm)). We should exclude the +// case can be opimized to (ORI32/ORI16 x, imm). +def imm32_1_pop_bit : PatLeaf<(imm), [{ + uint32_t I = N->getZExtValue(); + return llvm::popcount(I) == 1 && !isUInt<16>(I); +}]>; +// Optimize (or x, imm) to (BSETI (BSETI x, i0), i1), in which +// imm = (1 << i0) | (1 << i1), and imm has to be composed via +// a MOVIH32+ORI32 pair. +def imm32_2_pop_bits : PatLeaf<(imm), [{ + if (!N->hasOneUse()) + return false; + uint32_t I = N->getZExtValue(); + // Reject immediates can be composed via a single MOVIH32 or directly + // encoded into the IMM16 field of ORI32. + if (isUInt<16>(I) || isShiftedUInt<16, 16>(I)) + return false; + return llvm::popcount(I) == 2; +}]>; +// Optimize (and x, imm) to (BCLRI (BCLRI x, i0), i1), in which +// imm = ~((1 << i0) | (1 << i1)). +def imm32_30_pop_bits : PatLeaf<(imm), [{ + if (!N->hasOneUse()) + return false; + uint32_t I = ~N->getZExtValue(); + // Reject immediates can be directly encoded into the IMM12 field of ANDNI32. + if (isUInt<12>(I)) + return false; + return llvm::popcount(I) == 2; +}]>; +// Optimize (and x, imm) to (BCLRI x, log2(~imm)). We should exclude the +// case can be opimized to (ANDNI x, ~imm). +def imm32_31_pop_bits : PatLeaf<(imm), [{ uint32_t I = ~N->getZExtValue(); - return llvm::popcount(I) == 1 && I > 0xfff; + return llvm::popcount(I) == 1 && !isUInt<12>(I); }]>; def CSKYSymbol : AsmOperandClass { @@ -1316,10 +1350,16 @@ // Bit operations. let Predicates = [iHasE2] in { - def : Pat<(or GPR:$rs, uimm_bseti_1:$imm), - (BSETI32 GPR:$rs, (uimm_bseti_1_XFORM uimm_bseti_1:$imm))>; - def : Pat<(and GPR:$rs, uimm_bclri_1:$imm), - (BCLRI32 GPR:$rs, (uimm_bclri_1_XFORM uimm_bclri_1:$imm))>; + def : Pat<(or GPR:$rs, imm32_1_pop_bit:$imm), + (BSETI32 GPR:$rs, (LowestSetBit imm32_1_pop_bit:$imm))>; + def : Pat<(or GPR:$rs, imm32_2_pop_bits:$imm), + (BSETI32 (BSETI32 GPR:$rs, (LowestSetBit imm32_2_pop_bits:$imm)), + (HighestSetBit imm32_2_pop_bits:$imm))>; + def : Pat<(and GPR:$rs, imm32_30_pop_bits:$imm), + (BCLRI32 (BCLRI32 GPR:$rs, (LowestZeroBit imm32_30_pop_bits:$imm)), + (HighestZeroBit imm32_30_pop_bits:$imm))>; + def : Pat<(and GPR:$rs, imm32_31_pop_bits:$imm), + (BCLRI32 GPR:$rs, (LowestZeroBit imm32_31_pop_bits:$imm))>; } // Other operations. diff --git a/llvm/test/CodeGen/CSKY/bseti_bclri.ll b/llvm/test/CodeGen/CSKY/bseti_bclri.ll --- a/llvm/test/CodeGen/CSKY/bseti_bclri.ll +++ b/llvm/test/CodeGen/CSKY/bseti_bclri.ll @@ -50,14 +50,28 @@ define i32 @test_or_65540(i32 noundef %0) { ; CHECK-LABEL: test_or_65540: ; CHECK: # %bb.0: -; CHECK-NEXT: movih32 a1, 1 -; CHECK-NEXT: ori32 a1, a1, 4 -; CHECK-NEXT: or16 a0, a1 +; CHECK-NEXT: bseti16 a0, 2 +; CHECK-NEXT: bseti16 a0, 16 ; CHECK-NEXT: rts16 %2 = or i32 %0, 65540 ret i32 %2 } +define i32 @test_or_65540_twice(i32 noundef %0, i32 noundef %1) { +; CHECK-LABEL: test_or_65540_twice: +; CHECK: # %bb.0: +; CHECK-NEXT: movih32 a2, 1 +; CHECK-NEXT: ori32 a2, a2, 4 +; CHECK-NEXT: or16 a0, a2 +; CHECK-NEXT: or16 a1, a2 +; CHECK-NEXT: addu16 a0, a1 +; CHECK-NEXT: rts16 + %3 = or i32 %0, 65540 + %4 = or i32 %1, 65540 + %5 = add i32 %3, %4 + ret i32 %5 +} + define i32 @test_andnot_128(i32 noundef %0) { ; CHECK-LABEL: test_andnot_128: ; CHECK: # %bb.0: @@ -108,10 +122,24 @@ define i32 @test_andnot_65540(i32 noundef %0) { ; CHECK-LABEL: test_andnot_65540: ; CHECK: # %bb.0: -; CHECK-NEXT: movih32 a1, 65534 -; CHECK-NEXT: ori32 a1, a1, 65531 -; CHECK-NEXT: and16 a0, a1 +; CHECK-NEXT: bclri16 a0, 2 +; CHECK-NEXT: bclri16 a0, 16 ; CHECK-NEXT: rts16 %2 = and i32 %0, -65541 ret i32 %2 } + +define i32 @test_andnot_65540_twice(i32 noundef %0, i32 noundef %1) { +; CHECK-LABEL: test_andnot_65540_twice: +; CHECK: # %bb.0: +; CHECK-NEXT: movih32 a2, 65534 +; CHECK-NEXT: ori32 a2, a2, 65531 +; CHECK-NEXT: and16 a0, a2 +; CHECK-NEXT: and16 a1, a2 +; CHECK-NEXT: subu16 a0, a1 +; CHECK-NEXT: rts16 + %3 = and i32 %0, -65541 + %4 = and i32 %1, -65541 + %5 = sub i32 %3, %4 + ret i32 %5 +}