diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td --- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td +++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td @@ -158,6 +158,21 @@ let DecoderMethod = "decodeImmShiftOpValue"; } +// Optimize constant materialization with BMASKI32. +def imm_bmask_XFORM : SDNodeXFormgetZExtValue() + 1; + if (I == 0) + return CurDAG->getTargetConstant(32, SDLoc(N), + N->getValueType(0)); + else + return CurDAG->getTargetConstant(llvm::Log2_32(I), SDLoc(N), + N->getValueType(0)); +}]>; +def imm_bmask : PatLeaf<(imm), [{ + uint32_t I = N->getZExtValue() + 1; + return (llvm::popcount(I) == 1 && I >= 0x20000) || I == 0; +}]>; + def CSKYSymbol : AsmOperandClass { let Name = "CSKYSymbol"; let RenderMethod = "addImmOperands"; @@ -1413,9 +1428,12 @@ } // Constant materialize patterns. -let Predicates = [iHasE2] in +let Predicates = [iHasE2] in { def : Pat<(i32 imm:$imm), (ORI32 (MOVIH32 (uimm32_hi16 imm:$imm)), (uimm32_lo16 imm:$imm))>; + def : Pat<(i32 imm_bmask:$imm), + (BMASKI32 (imm_bmask_XFORM imm_bmask:$imm))>; +} // Other operations. let Predicates = [iHasE2] in { diff --git a/llvm/test/CodeGen/CSKY/base-i.ll b/llvm/test/CodeGen/CSKY/base-i.ll --- a/llvm/test/CodeGen/CSKY/base-i.ll +++ b/llvm/test/CodeGen/CSKY/base-i.ll @@ -86,6 +86,35 @@ ret i32 %add } +define i32 @addRI_0x1ffff(i32 %x) { +; CHECK-LABEL: addRI_0x1ffff: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bmaski32 a1, 17 +; CHECK-NEXT: addu16 a0, a1 +; CHECK-NEXT: rts16 +; +; GENERIC-LABEL: addRI_0x1ffff: +; GENERIC: # %bb.0: # %entry +; GENERIC-NEXT: .cfi_def_cfa_offset 0 +; GENERIC-NEXT: subi16 sp, sp, 4 +; GENERIC-NEXT: .cfi_def_cfa_offset 4 +; GENERIC-NEXT: movi16 a1, 0 +; GENERIC-NEXT: lsli16 a1, a1, 24 +; GENERIC-NEXT: movi16 a2, 1 +; GENERIC-NEXT: lsli16 a2, a2, 16 +; GENERIC-NEXT: or16 a2, a1 +; GENERIC-NEXT: movi16 a1, 255 +; GENERIC-NEXT: lsli16 a3, a1, 8 +; GENERIC-NEXT: or16 a3, a2 +; GENERIC-NEXT: or16 a3, a1 +; GENERIC-NEXT: addu16 a0, a0, a3 +; GENERIC-NEXT: addi16 sp, sp, 4 +; GENERIC-NEXT: rts16 +entry: + %add = add nsw i32 %x, 131071 + ret i32 %add +} + define i32 @addRI_X(i32 %x) { ; CHECK-LABEL: addRI_X: ; CHECK: # %bb.0: # %entry @@ -393,8 +422,7 @@ ; CHECK-LABEL: SUB_LONG_I: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: clrc32 -; CHECK-NEXT: movih32 a2, 65535 -; CHECK-NEXT: ori32 a2, a2, 65535 +; CHECK-NEXT: bmaski32 a2, 32 ; CHECK-NEXT: addc16 a0, a2 ; CHECK-NEXT: addc16 a1, a2 ; CHECK-NEXT: rts16 @@ -660,7 +688,7 @@ ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 8 ; GENERIC-NEXT: mov16 a2, a0 -; GENERIC-NEXT: lrw32 a3, [.LCPI29_0] +; GENERIC-NEXT: lrw32 a3, [.LCPI30_0] ; GENERIC-NEXT: mov16 a0, a1 ; GENERIC-NEXT: mov16 a1, a2 ; GENERIC-NEXT: jsr16 a3 @@ -671,7 +699,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI29_0: +; GENERIC-NEXT: .LCPI30_0: ; GENERIC-NEXT: .long __udivsi3 entry: %udiv = udiv i32 %y, %x @@ -693,7 +721,7 @@ ; GENERIC-NEXT: .cfi_offset lr, -4 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 8 -; GENERIC-NEXT: lrw32 a2, [.LCPI30_0] +; GENERIC-NEXT: lrw32 a2, [.LCPI31_0] ; GENERIC-NEXT: movi16 a1, 10 ; GENERIC-NEXT: jsr16 a2 ; GENERIC-NEXT: addi16 sp, sp, 4 @@ -703,7 +731,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI30_0: +; GENERIC-NEXT: .LCPI31_0: ; GENERIC-NEXT: .long __udivsi3 entry: %udiv = udiv i32 %x, 10 @@ -734,7 +762,7 @@ ; GENERIC-NEXT: or16 a2, a1 ; GENERIC-NEXT: movi16 a1, 1 ; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: lrw32 a2, [.LCPI31_0] +; GENERIC-NEXT: lrw32 a2, [.LCPI32_0] ; GENERIC-NEXT: jsr16 a2 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload @@ -743,7 +771,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI31_0: +; GENERIC-NEXT: .LCPI32_0: ; GENERIC-NEXT: .long __udivsi3 entry: %udiv = udiv i32 %x, 4097 @@ -779,7 +807,7 @@ ; GENERIC-NEXT: or16 a1, a3 ; GENERIC-NEXT: and16 a2, a1 ; GENERIC-NEXT: and16 a1, a0 -; GENERIC-NEXT: lrw32 a3, [.LCPI32_0] +; GENERIC-NEXT: lrw32 a3, [.LCPI33_0] ; GENERIC-NEXT: mov16 a0, a2 ; GENERIC-NEXT: jsr16 a3 ; GENERIC-NEXT: addi16 sp, sp, 4 @@ -790,7 +818,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI32_0: +; GENERIC-NEXT: .LCPI33_0: ; GENERIC-NEXT: .long __udivsi3 entry: %udiv = udiv i16 %y, %x @@ -854,7 +882,7 @@ ; GENERIC-NEXT: movi16 a1, 255 ; GENERIC-NEXT: and16 a2, a1 ; GENERIC-NEXT: and16 a1, a0 -; GENERIC-NEXT: lrw32 a3, [.LCPI34_0] +; GENERIC-NEXT: lrw32 a3, [.LCPI35_0] ; GENERIC-NEXT: mov16 a0, a2 ; GENERIC-NEXT: jsr16 a3 ; GENERIC-NEXT: addi16 sp, sp, 4 @@ -864,7 +892,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI34_0: +; GENERIC-NEXT: .LCPI35_0: ; GENERIC-NEXT: .long __udivsi3 entry: %udiv = udiv i8 %y, %x @@ -912,7 +940,7 @@ ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 8 ; GENERIC-NEXT: mov16 a2, a0 -; GENERIC-NEXT: lrw32 a3, [.LCPI36_0] +; GENERIC-NEXT: lrw32 a3, [.LCPI37_0] ; GENERIC-NEXT: mov16 a0, a1 ; GENERIC-NEXT: mov16 a1, a2 ; GENERIC-NEXT: jsr16 a3 @@ -923,7 +951,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI36_0: +; GENERIC-NEXT: .LCPI37_0: ; GENERIC-NEXT: .long __divsi3 entry: %sdiv = sdiv i32 %y, %x @@ -945,7 +973,7 @@ ; GENERIC-NEXT: .cfi_offset lr, -4 ; GENERIC-NEXT: subi16 sp, sp, 4 ; GENERIC-NEXT: .cfi_def_cfa_offset 8 -; GENERIC-NEXT: lrw32 a2, [.LCPI37_0] +; GENERIC-NEXT: lrw32 a2, [.LCPI38_0] ; GENERIC-NEXT: movi16 a1, 10 ; GENERIC-NEXT: jsr16 a2 ; GENERIC-NEXT: addi16 sp, sp, 4 @@ -955,7 +983,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI37_0: +; GENERIC-NEXT: .LCPI38_0: ; GENERIC-NEXT: .long __divsi3 entry: %sdiv = sdiv i32 %x, 10 @@ -986,7 +1014,7 @@ ; GENERIC-NEXT: or16 a2, a1 ; GENERIC-NEXT: movi16 a1, 1 ; GENERIC-NEXT: or16 a1, a2 -; GENERIC-NEXT: lrw32 a2, [.LCPI38_0] +; GENERIC-NEXT: lrw32 a2, [.LCPI39_0] ; GENERIC-NEXT: jsr16 a2 ; GENERIC-NEXT: addi16 sp, sp, 4 ; GENERIC-NEXT: ld32.w lr, (sp, 0) # 4-byte Folded Reload @@ -995,7 +1023,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI38_0: +; GENERIC-NEXT: .LCPI39_0: ; GENERIC-NEXT: .long __divsi3 entry: %sdiv = sdiv i32 %x, 4097 @@ -1020,7 +1048,7 @@ ; GENERIC-NEXT: .cfi_def_cfa_offset 8 ; GENERIC-NEXT: sexth16 a2, a1 ; GENERIC-NEXT: sexth16 a1, a0 -; GENERIC-NEXT: lrw32 a3, [.LCPI39_0] +; GENERIC-NEXT: lrw32 a3, [.LCPI40_0] ; GENERIC-NEXT: mov16 a0, a2 ; GENERIC-NEXT: jsr16 a3 ; GENERIC-NEXT: addi16 sp, sp, 4 @@ -1030,7 +1058,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI39_0: +; GENERIC-NEXT: .LCPI40_0: ; GENERIC-NEXT: .long __divsi3 entry: %sdiv = sdiv i16 %y, %x @@ -1092,7 +1120,7 @@ ; GENERIC-NEXT: .cfi_def_cfa_offset 8 ; GENERIC-NEXT: sextb16 a2, a1 ; GENERIC-NEXT: sextb16 a1, a0 -; GENERIC-NEXT: lrw32 a3, [.LCPI41_0] +; GENERIC-NEXT: lrw32 a3, [.LCPI42_0] ; GENERIC-NEXT: mov16 a0, a2 ; GENERIC-NEXT: jsr16 a3 ; GENERIC-NEXT: addi16 sp, sp, 4 @@ -1102,7 +1130,7 @@ ; GENERIC-NEXT: .p2align 1 ; GENERIC-NEXT: # %bb.1: ; GENERIC-NEXT: .p2align 2, 0x0 -; GENERIC-NEXT: .LCPI41_0: +; GENERIC-NEXT: .LCPI42_0: ; GENERIC-NEXT: .long __divsi3 entry: %sdiv = sdiv i8 %y, %x diff --git a/llvm/test/CodeGen/CSKY/switch.ll b/llvm/test/CodeGen/CSKY/switch.ll --- a/llvm/test/CodeGen/CSKY/switch.ll +++ b/llvm/test/CodeGen/CSKY/switch.ll @@ -17,8 +17,7 @@ ; CHECK-NEXT: movi16 a0, 0 ; CHECK-NEXT: rts16 ; CHECK-NEXT: .LBB0_3: # %otherwise -; CHECK-NEXT: movih32 a0, 65535 -; CHECK-NEXT: ori32 a0, a0, 65535 +; CHECK-NEXT: bmaski32 a0, 32 ; CHECK-NEXT: rts16 ; CHECK-NEXT: .LBB0_4: # %onone ; CHECK-NEXT: movi16 a0, 1 @@ -34,7 +33,7 @@ ; CHECK-NEXT: rts16 ; CHECK-NEXT: .p2align 1 ; CHECK-NEXT: # %bb.8: -; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: .p2align 2, 0x0 ; CHECK-NEXT: .LCPI0_0: ; CHECK-NEXT: .long .LJTI0_0 ; @@ -59,8 +58,7 @@ ; CHECK-PIC-SMALL-NEXT: movi16 a0, 0 ; CHECK-PIC-SMALL-NEXT: br32 .LBB0_8 ; CHECK-PIC-SMALL-NEXT: .LBB0_3: # %otherwise -; CHECK-PIC-SMALL-NEXT: movih32 a0, 65535 -; CHECK-PIC-SMALL-NEXT: ori32 a0, a0, 65535 +; CHECK-PIC-SMALL-NEXT: bmaski32 a0, 32 ; CHECK-PIC-SMALL-NEXT: br32 .LBB0_8 ; CHECK-PIC-SMALL-NEXT: .LBB0_4: # %onone ; CHECK-PIC-SMALL-NEXT: movi16 a0, 1 @@ -79,7 +77,7 @@ ; CHECK-PIC-SMALL-NEXT: rts16 ; CHECK-PIC-SMALL-NEXT: .p2align 1 ; CHECK-PIC-SMALL-NEXT: # %bb.9: -; CHECK-PIC-SMALL-NEXT: .p2align 2 +; CHECK-PIC-SMALL-NEXT: .p2align 2, 0x0 ; CHECK-PIC-SMALL-NEXT: .LCPI0_0: ; CHECK-PIC-SMALL-NEXT: .long _GLOBAL_OFFSET_TABLE_ ; CHECK-PIC-SMALL-NEXT: .LCPI0_1: @@ -106,8 +104,7 @@ ; CHECK-PIC-LARGE-NEXT: movi16 a0, 0 ; CHECK-PIC-LARGE-NEXT: br32 .LBB0_8 ; CHECK-PIC-LARGE-NEXT: .LBB0_3: # %otherwise -; CHECK-PIC-LARGE-NEXT: movih32 a0, 65535 -; CHECK-PIC-LARGE-NEXT: ori32 a0, a0, 65535 +; CHECK-PIC-LARGE-NEXT: bmaski32 a0, 32 ; CHECK-PIC-LARGE-NEXT: br32 .LBB0_8 ; CHECK-PIC-LARGE-NEXT: .LBB0_4: # %onone ; CHECK-PIC-LARGE-NEXT: movi16 a0, 1 @@ -126,7 +123,7 @@ ; CHECK-PIC-LARGE-NEXT: rts16 ; CHECK-PIC-LARGE-NEXT: .p2align 1 ; CHECK-PIC-LARGE-NEXT: # %bb.9: -; CHECK-PIC-LARGE-NEXT: .p2align 2 +; CHECK-PIC-LARGE-NEXT: .p2align 2, 0x0 ; CHECK-PIC-LARGE-NEXT: .LCPI0_0: ; CHECK-PIC-LARGE-NEXT: .long _GLOBAL_OFFSET_TABLE_ ; CHECK-PIC-LARGE-NEXT: .LCPI0_1: