diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -105,10 +105,20 @@ MachineMemOperand::Flags Flags, bool *Fast) const override; - // Block s/udiv lowering for now - bool isIntDivCheap(EVT VT, AttributeList Attr) const override { return true; } - + /// Target Optimization { + + // SX-Aurora VE's s/udiv is 5-9 times slower than multiply. + bool isIntDivCheap(EVT, AttributeList) const override { return false; } + // VE doesn't have rem. + bool hasStandaloneRem(EVT) const override { return false; } + // VE LDZ instruction returns 64 if the input is zero. + bool isCheapToSpeculateCtlz() const override { return true; } + // VE LDZ instruction is fast. + bool isCtlzFast() const override { return true; } + // VE has NND instruction. bool hasAndNot(SDValue Y) const override; + + /// } Target Optimization }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -684,14 +684,18 @@ setOperationAction(ISD::ROTL, IntVT, Expand); setOperationAction(ISD::ROTR, IntVT, Expand); - // Use isel patterns for i32 and i64 + // VE has 64 bits instruction which works as i64 BSWAP operation. This + // instruction works fine as i32 BSWAP operation with an additional + // parameter. Use isel patterns to lower BSWAP. setOperationAction(ISD::BSWAP, IntVT, Legal); - setOperationAction(ISD::CTLZ, IntVT, Legal); - setOperationAction(ISD::CTPOP, IntVT, Legal); - // Use isel patterns for i64, Promote i32 + // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP + // operations. Use isel patterns for i64, promote for i32. LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal; setOperationAction(ISD::BITREVERSE, IntVT, Act); + setOperationAction(ISD::CTLZ, IntVT, Act); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act); + setOperationAction(ISD::CTPOP, IntVT, Act); } /// } Int Ops diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -1203,7 +1203,10 @@ defm MRG : RRMRGm<"mrg", 0x56, I64, i64>; // Section 8.5.7 - LDZ (Leading Zero Count) -defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>; +def ctlz_pat : PatFrags<(ops node:$src), + [(ctlz node:$src), + (ctlz_zero_undef node:$src)]>; +defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz_pat>; // Section 8.5.8 - PCNT (Population Count) defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>; @@ -1213,6 +1216,16 @@ // Section 8.5.10 - BSWP (Byte Swap) defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>; +def : Pat<(i64 (bswap i64:$src)), + (BSWPri $src, 0)>; +def : Pat<(i64 (bswap (i64 mimm:$src))), + (BSWPmi (MIMM $src), 0)>; +def : Pat<(i32 (bswap i32:$src)), + (EXTRACT_SUBREG + (BSWPri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), + sub_i32)>; +def : Pat<(i32 (bswap (i32 mimm:$src))), + (EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>; // Section 8.5.11 - CMOV (Conditional Move) let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>; @@ -1982,19 +1995,6 @@ (EXTRACT_SUBREG (SLLri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $op, sub_i32), 32), sub_f32)>; -// Bits operations pattern matchings. -def : Pat<(i32 (ctpop i32:$src)), - (EXTRACT_SUBREG (PCNTr (ANDrm (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), $src, sub_i32), !add(32, 64))), sub_i32)>; -def : Pat<(i32 (ctlz i32:$src)), - (EXTRACT_SUBREG (LDZr (SLLri (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), $src, sub_i32), 32)), sub_i32)>; -def : Pat<(i64 (bswap i64:$src)), - (BSWPri $src, 0)>; -def : Pat<(i32 (bswap i32:$src)), - (EXTRACT_SUBREG (BSWPri (INSERT_SUBREG - (i64 (IMPLICIT_DEF)), $src, sub_i32), 1), sub_i32)>; - // Several special pattern matches to optimize code def : Pat<(i32 (and i32:$lhs, 0xff)), diff --git a/llvm/test/CodeGen/VE/bitreverse.ll b/llvm/test/CodeGen/VE/bitreverse.ll --- a/llvm/test/CodeGen/VE/bitreverse.ll +++ b/llvm/test/CodeGen/VE/bitreverse.ll @@ -1,7 +1,24 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s -define i64 @func1(i64 %p) { -; CHECK-LABEL: func1: +declare i128 @llvm.bitreverse.i128(i128) +declare i64 @llvm.bitreverse.i64(i64) +declare i32 @llvm.bitreverse.i32(i32) +declare i16 @llvm.bitreverse.i16(i16) +declare i8 @llvm.bitreverse.i8(i8) + +define i128 @func128(i128 %p) { +; CHECK-LABEL: func128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: brv %s2, %s1 +; CHECK-NEXT: brv %s1, %s0 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.bitreverse.i128(i128 %p) + ret i128 %r +} + +define i64 @func64(i64 %p) { +; CHECK-LABEL: func64: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -9,10 +26,18 @@ ret i64 %r } -declare i64 @llvm.bitreverse.i64(i64) +define signext i32 @func32s(i32 signext %p) { +; CHECK-LABEL: func32s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: sra.l %s0, %s0, 32 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.bitreverse.i32(i32 %p) + ret i32 %r +} -define i32 @func2(i32 %p) { -; CHECK-LABEL: func2: +define zeroext i32 @func32z(i32 zeroext %p) { +; CHECK-LABEL: func32z: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: srl %s0, %s0, 32 @@ -21,10 +46,8 @@ ret i32 %r } -declare i32 @llvm.bitreverse.i32(i32) - -define signext i16 @func3(i16 signext %p) { -; CHECK-LABEL: func3: +define signext i16 @func16s(i16 signext %p) { +; CHECK-LABEL: func16s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: sra.l %s0, %s0, 48 @@ -33,10 +56,18 @@ ret i16 %r } -declare i16 @llvm.bitreverse.i16(i16) +define zeroext i16 @func16z(i16 zeroext %p) { +; CHECK-LABEL: func16z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 48 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.bitreverse.i16(i16 %p) + ret i16 %r +} -define signext i8 @func4(i8 signext %p) { -; CHECK-LABEL: func4: +define signext i8 @func8s(i8 signext %p) { +; CHECK-LABEL: func8s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: brv %s0, %s0 ; CHECK-NEXT: sra.l %s0, %s0, 56 @@ -45,44 +76,86 @@ ret i8 %r } -declare i8 @llvm.bitreverse.i8(i8) - -define i64 @func5(i64 %p) { -; CHECK-LABEL: func5: +define zeroext i8 @func8z(i8 zeroext %p) { +; CHECK-LABEL: func8z: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: brv %s0, %s0 +; CHECK-NEXT: srl %s0, %s0, 56 ; CHECK-NEXT: or %s11, 0, %s9 - %r = tail call i64 @llvm.bitreverse.i64(i64 %p) + %r = tail call i8 @llvm.bitreverse.i8(i8 %p) + ret i8 %r +} + +define i128 @func128i() { +; CHECK-LABEL: func128i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: lea.sl %s1, -65536 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.bitreverse.i128(i128 65535) + ret i128 %r +} + +define i64 @func64i() { +; CHECK-LABEL: func64i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea.sl %s0, -65536 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i64 @llvm.bitreverse.i64(i64 65535) ret i64 %r } -define i32 @func6(i32 %p) { -; CHECK-LABEL: func6: +define signext i32 @func32is() { +; CHECK-LABEL: func32is: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: brv %s0, %s0 -; CHECK-NEXT: srl %s0, %s0, 32 +; CHECK-NEXT: lea %s0, -65536 ; CHECK-NEXT: or %s11, 0, %s9 - %r = tail call i32 @llvm.bitreverse.i32(i32 %p) + %r = tail call i32 @llvm.bitreverse.i32(i32 65535) ret i32 %r } -define zeroext i16 @func7(i16 zeroext %p) { -; CHECK-LABEL: func7: +define zeroext i32 @func32iz() { +; CHECK-LABEL: func32iz: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: brv %s0, %s0 -; CHECK-NEXT: srl %s0, %s0, 48 +; CHECK-NEXT: lea %s0, -65536 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: or %s11, 0, %s9 - %r = tail call i16 @llvm.bitreverse.i16(i16 %p) + %r = tail call i32 @llvm.bitreverse.i32(i32 65535) + ret i32 %r +} + +define signext i16 @func16is() { +; CHECK-LABEL: func16is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, -256 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.bitreverse.i16(i16 255) ret i16 %r } -define zeroext i8 @func8(i8 zeroext %p) { -; CHECK-LABEL: func8: +define zeroext i16 @func16iz() { +; CHECK-LABEL: func16iz: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: brv %s0, %s0 -; CHECK-NEXT: srl %s0, %s0, 56 +; CHECK-NEXT: lea %s0, 65280 ; CHECK-NEXT: or %s11, 0, %s9 - %r = tail call i8 @llvm.bitreverse.i8(i8 %p) + %r = tail call i16 @llvm.bitreverse.i16(i16 255) + ret i16 %r +} + +define signext i8 @func8is() { +; CHECK-LABEL: func8is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 15, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.bitreverse.i8(i8 240) ret i8 %r } +define zeroext i8 @func8iz() { +; CHECK-LABEL: func8iz: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 15, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.bitreverse.i8(i8 240) + ret i8 %r +} diff --git a/llvm/test/CodeGen/VE/bswap.ll b/llvm/test/CodeGen/VE/bswap.ll --- a/llvm/test/CodeGen/VE/bswap.ll +++ b/llvm/test/CodeGen/VE/bswap.ll @@ -1,7 +1,23 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s -define i64 @func1(i64 %p) { -; CHECK-LABEL: func1: +declare i128 @llvm.bswap.i128(i128) +declare i64 @llvm.bswap.i64(i64) +declare i32 @llvm.bswap.i32(i32) +declare i16 @llvm.bswap.i16(i16) + +define i128 @func128(i128 %p) { +; CHECK-LABEL: func128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: bswp %s2, %s1, 0 +; CHECK-NEXT: bswp %s1, %s0, 0 +; CHECK-NEXT: or %s0, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.bswap.i128(i128 %p) + ret i128 %r +} + +define i64 @func64(i64 %p) { +; CHECK-LABEL: func64: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: bswp %s0, %s0, 0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -9,21 +25,28 @@ ret i64 %r } -declare i64 @llvm.bswap.i64(i64) - -define i32 @func2(i32 %p) { -; CHECK-LABEL: func2: +define signext i32 @func32s(i32 signext %p) { +; CHECK-LABEL: func32s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: bswp %s0, %s0, 1 +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.bswap.i32(i32 %p) ret i32 %r } -declare i32 @llvm.bswap.i32(i32) +define zeroext i32 @func32z(i32 zeroext %p) { +; CHECK-LABEL: func32z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: bswp %s0, %s0, 1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.bswap.i32(i32 %p) + ret i32 %r +} -define signext i16 @func3(i16 signext %p) { -; CHECK-LABEL: func3: +define signext i16 @func16s(i16 signext %p) { +; CHECK-LABEL: func16s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: bswp %s0, %s0, 1 ; CHECK-NEXT: and %s0, %s0, (32)0 @@ -35,34 +58,70 @@ ret i16 %r } -declare i16 @llvm.bswap.i16(i16) +define zeroext i16 @func16z(i16 zeroext %p) { +; CHECK-LABEL: func16z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: bswp %s0, %s0, 1 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: srl %s0, %s0, 16 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.bswap.i16(i16 %p) + ret i16 %r +} -define i64 @func4(i64 %p) { -; CHECK-LABEL: func4: +define i128 @func128i() { +; CHECK-LABEL: func128i: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: bswp %s0, %s0, 0 +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: lea.sl %s1, -16777216 ; CHECK-NEXT: or %s11, 0, %s9 - %r = tail call i64 @llvm.bswap.i64(i64 %p) + %r = tail call i128 @llvm.bswap.i128(i128 255) + ret i128 %r +} + +define i64 @func64i() { +; CHECK-LABEL: func64i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea.sl %s0, -16777216 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i64 @llvm.bswap.i64(i64 255) ret i64 %r } -define i32 @func5(i32 %p) { -; CHECK-LABEL: func5: +define signext i32 @func32si() { +; CHECK-LABEL: func32si: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: bswp %s0, %s0, 1 +; CHECK-NEXT: lea %s0, -16777216 ; CHECK-NEXT: or %s11, 0, %s9 - %r = tail call i32 @llvm.bswap.i32(i32 %p) + %r = tail call i32 @llvm.bswap.i32(i32 255) ret i32 %r } -define zeroext i16 @func6(i16 zeroext %p) { -; CHECK-LABEL: func6: +define zeroext i32 @func32zi() { +; CHECK-LABEL: func32zi: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: bswp %s0, %s0, 1 +; CHECK-NEXT: lea %s0, -16777216 ; CHECK-NEXT: and %s0, %s0, (32)0 -; CHECK-NEXT: srl %s0, %s0, 16 -; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: or %s11, 0, %s9 - %r = tail call i16 @llvm.bswap.i16(i16 %p) + %r = tail call i32 @llvm.bswap.i32(i32 255) + ret i32 %r +} + +define signext i16 @func16si() { +; CHECK-LABEL: func16si: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, -256 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.bswap.i16(i16 255) + ret i16 %r +} + +define zeroext i16 @func16zi() { +; CHECK-LABEL: func16zi: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 65280 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.bswap.i16(i16 255) ret i16 %r } diff --git a/llvm/test/CodeGen/VE/ctlz.ll b/llvm/test/CodeGen/VE/ctlz.ll --- a/llvm/test/CodeGen/VE/ctlz.ll +++ b/llvm/test/CodeGen/VE/ctlz.ll @@ -1,7 +1,28 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s -define i64 @func1(i64 %p) { -; CHECK-LABEL: func1: +declare i128 @llvm.ctlz.i128(i128, i1) +declare i64 @llvm.ctlz.i64(i64, i1) +declare i32 @llvm.ctlz.i32(i32, i1) +declare i16 @llvm.ctlz.i16(i16, i1) +declare i8 @llvm.ctlz.i8(i8, i1) + +define i128 @func128(i128 %p){ +; CHECK-LABEL: func128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, %s2 +; CHECK-NEXT: ldz %s1, %s1 +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, 64(, %s0) +; CHECK-NEXT: cmov.l.ne %s0, %s1, %s3 +; CHECK-NEXT: or %s1, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.ctlz.i128(i128 %p, i1 true) + ret i128 %r +} + +define i64 @func64(i64 %p) { +; CHECK-LABEL: func64: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: ldz %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -9,45 +30,245 @@ ret i64 %r } -declare i64 @llvm.ctlz.i64(i64, i1) - -define i32 @func2(i32 %p) { -; CHECK-LABEL: func2: +define signext i32 @func32s(i32 signext %p) { +; CHECK-LABEL: func32s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 -; CHECK-NEXT: sll %s0, %s0, 32 +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true) ret i32 %r } -declare i32 @llvm.ctlz.i32(i32, i1) +define zeroext i32 @func32z(i32 zeroext %p) { +; CHECK-LABEL: func32z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true) + ret i32 %r +} -define i16 @func3(i16 %p) { -; CHECK-LABEL: func3: +define signext i16 @func16s(i16 signext %p) { +; CHECK-LABEL: func16s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: and %s0, %s0, (48)0 -; CHECK-NEXT: sll %s0, %s0, 32 ; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) ; CHECK-NEXT: adds.w.sx %s0, -16, %s0 +; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 true) ret i16 %r } -declare i16 @llvm.ctlz.i16(i16, i1) +define zeroext i16 @func16z(i16 zeroext %p) { +; CHECK-LABEL: func16z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: adds.w.sx %s0, -16, %s0 +; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 true) + ret i16 %r +} -define i8 @func4(i8 %p) { -; CHECK-LABEL: func4: +define signext i8 @func8s(i8 signext %p) { +; CHECK-LABEL: func8s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: and %s0, %s0, (56)0 -; CHECK-NEXT: sll %s0, %s0, 32 ; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) ; CHECK-NEXT: adds.w.sx %s0, -24, %s0 +; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 true) ret i8 %r } -declare i8 @llvm.ctlz.i8(i8, i1) +define zeroext i8 @func8z(i8 zeroext %p) { +; CHECK-LABEL: func8z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: adds.w.sx %s0, -24, %s0 +; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 true) + ret i8 %r +} + +define i128 @func128i(){ +; CHECK-LABEL: func128i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 112 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.ctlz.i128(i128 65535, i1 true) + ret i128 %r +} + +define i64 @func64i() { +; CHECK-LABEL: func64i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 48, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i64 @llvm.ctlz.i64(i64 65535, i1 true) + ret i64 %r +} + +define signext i32 @func32is() { +; CHECK-LABEL: func32is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 16, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.ctlz.i32(i32 65535, i1 true) + ret i32 %r +} + +define zeroext i32 @func32iz() { +; CHECK-LABEL: func32iz: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 16, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.ctlz.i32(i32 65535, i1 true) + ret i32 %r +} + +define signext i16 @func16is() { +; CHECK-LABEL: func16is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.ctlz.i16(i16 255, i1 true) + ret i16 %r +} + +define zeroext i16 @func16iz() { +; CHECK-LABEL: func16iz: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.ctlz.i16(i16 255, i1 true) + ret i16 %r +} + +define signext i8 @func8is() { +; CHECK-LABEL: func8is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.ctlz.i8(i8 255, i1 true) + ret i8 %r +} + +define zeroext i8 @func8iz() { +; CHECK-LABEL: func8iz: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.ctlz.i8(i8 255, i1 true) + ret i8 %r +} + +define i128 @func128x(i128 %p){ +; CHECK-LABEL: func128x: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s1, %s2 +; CHECK-NEXT: ldz %s1, %s1 +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, 64(, %s0) +; CHECK-NEXT: cmov.l.ne %s0, %s1, %s3 +; CHECK-NEXT: or %s1, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.ctlz.i128(i128 %p, i1 false) + ret i128 %r +} + +define i64 @func64x(i64 %p) { +; CHECK-LABEL: func64x: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i64 @llvm.ctlz.i64(i64 %p, i1 false) + ret i64 %r +} + +define signext i32 @func32sx(i32 signext %p) { +; CHECK-LABEL: func32sx: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 false) + ret i32 %r +} + +define zeroext i32 @func32zx(i32 zeroext %p) { +; CHECK-LABEL: func32zx: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 false) + ret i32 %r +} + +define signext i16 @func16sx(i16 signext %p) { +; CHECK-LABEL: func16sx: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: adds.w.sx %s0, -16, %s0 +; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 false) + ret i16 %r +} + +define zeroext i16 @func16zx(i16 zeroext %p) { +; CHECK-LABEL: func16zx: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: adds.w.sx %s0, -16, %s0 +; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 false) + ret i16 %r +} + +define signext i8 @func8sx(i8 signext %p) { +; CHECK-LABEL: func8sx: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: adds.w.sx %s0, -24, %s0 +; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 false) + ret i8 %r +} + +define zeroext i8 @func8zx(i8 zeroext %p) { +; CHECK-LABEL: func8zx: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: ldz %s0, %s0 +; CHECK-NEXT: lea %s0, -32(, %s0) +; CHECK-NEXT: adds.w.sx %s0, -24, %s0 +; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 false) + ret i8 %r +} diff --git a/llvm/test/CodeGen/VE/ctpop.ll b/llvm/test/CodeGen/VE/ctpop.ll --- a/llvm/test/CodeGen/VE/ctpop.ll +++ b/llvm/test/CodeGen/VE/ctpop.ll @@ -1,7 +1,25 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s -define i64 @func1(i64 %p) { -; CHECK-LABEL: func1: +declare i128 @llvm.ctpop.i128(i128) +declare i64 @llvm.ctpop.i64(i64) +declare i32 @llvm.ctpop.i32(i32) +declare i16 @llvm.ctpop.i16(i16) +declare i8 @llvm.ctpop.i8(i8) + +define i128 @func128(i128 %p) { +; CHECK-LABEL: func128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: pcnt %s1, %s1 +; CHECK-NEXT: pcnt %s0, %s0 +; CHECK-NEXT: adds.l %s0, %s0, %s1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.ctpop.i128(i128 %p) + ret i128 %r +} + +define i64 @func64(i64 %p) { +; CHECK-LABEL: func64: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: pcnt %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -9,12 +27,9 @@ ret i64 %r } -declare i64 @llvm.ctpop.i64(i64 %p) - -define i32 @func2(i32 %p) { -; CHECK-LABEL: func2: +define signext i32 @func32s(i32 signext %p) { +; CHECK-LABEL: func32s: ; CHECK: .LBB{{[0-9]+}}_2: -; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: pcnt %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 @@ -22,10 +37,17 @@ ret i32 %r } -declare i32 @llvm.ctpop.i32(i32 %p) +define zeroext i32 @func32z(i32 zeroext %p) { +; CHECK-LABEL: func32z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: pcnt %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.ctpop.i32(i32 %p) + ret i32 %r +} -define i16 @func3(i16 %p) { -; CHECK-LABEL: func3: +define signext i16 @func16s(i16 signext %p) { +; CHECK-LABEL: func16s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: pcnt %s0, %s0 @@ -34,10 +56,17 @@ ret i16 %r } -declare i16 @llvm.ctpop.i16(i16 %p) +define zeroext i16 @func16z(i16 zeroext %p) { +; CHECK-LABEL: func16z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: pcnt %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.ctpop.i16(i16 %p) + ret i16 %r +} -define i8 @func4(i8 %p) { -; CHECK-LABEL: func4: +define signext i8 @func8s(i8 signext %p) { +; CHECK-LABEL: func8s: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: pcnt %s0, %s0 @@ -46,4 +75,84 @@ ret i8 %r } -declare i8 @llvm.ctpop.i8(i8) +define zeroext i8 @func8z(i8 zeroext %p) { +; CHECK-LABEL: func8z: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: pcnt %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.ctpop.i8(i8 %p) + ret i8 %r +} + +define i128 @func128i() { +; CHECK-LABEL: func128i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 16, (0)1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.ctpop.i128(i128 65535) + ret i128 %r +} + +define i64 @func64i() { +; CHECK-LABEL: func64i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 16, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i64 @llvm.ctpop.i64(i64 65535) + ret i64 %r +} + +define signext i32 @func32is() { +; CHECK-LABEL: func32is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 16, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.ctpop.i32(i32 65535) + ret i32 %r +} + +define zeroext i32 @func32iz() { +; CHECK-LABEL: func32iz: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 16, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.ctpop.i32(i32 65535) + ret i32 %r +} + +define signext i16 @func16si() { +; CHECK-LABEL: func16si: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 16, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.ctpop.i16(i16 65535) + ret i16 %r +} + +define zeroext i16 @func16zi() { +; CHECK-LABEL: func16zi: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 16, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.ctpop.i16(i16 65535) + ret i16 %r +} + +define signext i8 @func8si() { +; CHECK-LABEL: func8si: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.ctpop.i8(i8 255) + ret i8 %r +} + +define zeroext i8 @func8zi() { +; CHECK-LABEL: func8zi: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.ctpop.i8(i8 255) + ret i8 %r +} diff --git a/llvm/test/CodeGen/VE/cttz.ll b/llvm/test/CodeGen/VE/cttz.ll --- a/llvm/test/CodeGen/VE/cttz.ll +++ b/llvm/test/CodeGen/VE/cttz.ll @@ -1,7 +1,32 @@ ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s -define i64 @func1(i64 %p) { -; CHECK-LABEL: func1: +declare i128 @llvm.cttz.i128(i128, i1) +declare i64 @llvm.cttz.i64(i64, i1) +declare i32 @llvm.cttz.i32(i32, i1) +declare i16 @llvm.cttz.i16(i16, i1) +declare i8 @llvm.cttz.i8(i8, i1) + +define i128 @func128(i128 %p) { +; CHECK-LABEL: func128: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s2, 0, (0)1 +; CHECK-NEXT: cmps.l %s3, %s0, %s2 +; CHECK-NEXT: lea %s4, -1(, %s0) +; CHECK-NEXT: nnd %s0, %s0, %s4 +; CHECK-NEXT: pcnt %s4, %s0 +; CHECK-NEXT: lea %s0, -1(, %s1) +; CHECK-NEXT: nnd %s0, %s1, %s0 +; CHECK-NEXT: pcnt %s0, %s0 +; CHECK-NEXT: lea %s0, 64(, %s0) +; CHECK-NEXT: cmov.l.ne %s0, %s4, %s3 +; CHECK-NEXT: or %s1, 0, %s2 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.cttz.i128(i128 %p, i1 true) + ret i128 %r +} + +define i64 @func64(i64 %p) { +; CHECK-LABEL: func64: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: lea %s1, -1(, %s0) ; CHECK-NEXT: nnd %s0, %s0, %s1 @@ -11,52 +36,159 @@ ret i64 %r } -declare i64 @llvm.cttz.i64(i64, i1) +define signext i32 @func32s(i32 signext %p) { +; CHECK-LABEL: func32s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, -1, %s0 +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: pcnt %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.cttz.i32(i32 %p, i1 true) + ret i32 %r +} -define i32 @func2(i32 %p) { -; CHECK-LABEL: func2: +define zeroext i32 @func32z(i32 zeroext %p) { +; CHECK-LABEL: func32z: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s1, -1, %s0 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: pcnt %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i32 @llvm.cttz.i32(i32 %p, i1 true) ret i32 %r } -declare i32 @llvm.cttz.i32(i32, i1) +define signext i16 @func16s(i16 signext %p) { +; CHECK-LABEL: func16s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, -1, %s0 +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: pcnt %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.cttz.i16(i16 %p, i1 true) + ret i16 %r +} -define i16 @func3(i16 %p) { -; CHECK-LABEL: func3: +define zeroext i16 @func16z(i16 zeroext %p) { +; CHECK-LABEL: func16z: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s1, -1, %s0 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: pcnt %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i16 @llvm.cttz.i16(i16 %p, i1 true) ret i16 %r } -declare i16 @llvm.cttz.i16(i16, i1) +define signext i8 @func8s(i8 signext %p) { +; CHECK-LABEL: func8s: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: adds.w.sx %s1, -1, %s0 +; CHECK-NEXT: xor %s0, -1, %s0 +; CHECK-NEXT: and %s0, %s0, %s1 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 +; CHECK-NEXT: pcnt %s0, %s0 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.cttz.i8(i8 %p, i1 true) + ret i8 %r +} -define i8 @func4(i8 %p) { -; CHECK-LABEL: func4: +define zeroext i8 @func8z(i8 zeroext %p) { +; CHECK-LABEL: func8z: ; CHECK: .LBB{{[0-9]+}}_2: ; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 ; CHECK-NEXT: adds.w.sx %s1, -1, %s0 ; CHECK-NEXT: xor %s0, -1, %s0 ; CHECK-NEXT: and %s0, %s0, %s1 -; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: adds.w.zx %s0, %s0, (0)1 ; CHECK-NEXT: pcnt %s0, %s0 ; CHECK-NEXT: or %s11, 0, %s9 %r = tail call i8 @llvm.cttz.i8(i8 %p, i1 true) ret i8 %r } -declare i8 @llvm.cttz.i8(i8, i1) +define i128 @func128i() { +; CHECK-LABEL: func128i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s1, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i128 @llvm.cttz.i128(i128 65280, i1 true) + ret i128 %r +} + +define i64 @func64i() { +; CHECK-LABEL: func64i: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i64 @llvm.cttz.i64(i64 65280, i1 true) + ret i64 %r +} + +define signext i32 @func32is() { +; CHECK-LABEL: func32is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.cttz.i32(i32 65280, i1 true) + ret i32 %r +} + +define zeroext i32 @func32iz() { +; CHECK-LABEL: func32iz: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i32 @llvm.cttz.i32(i32 65280, i1 true) + ret i32 %r +} + +define signext i16 @func16is() { +; CHECK-LABEL: func16is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.cttz.i16(i16 65280, i1 true) + ret i16 %r +} + +define zeroext i16 @func16iz() { +; CHECK-LABEL: func16iz: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 8, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i16 @llvm.cttz.i16(i16 65280, i1 true) + ret i16 %r +} + +define signext i8 @func8is() { +; CHECK-LABEL: func8is: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 4, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.cttz.i8(i8 240, i1 true) + ret i8 %r +} + +define zeroext i8 @func8iz() { +; CHECK-LABEL: func8iz: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 4, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + %r = tail call i8 @llvm.cttz.i8(i8 240, i1 true) + ret i8 %r +}