Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -137,9 +137,12 @@ def SDTFPTernaryOp : SDTypeProfile<1, 3, [ // fmadd, fnmsub, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0> ]>; -def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz +def SDTIntUnaryOp : SDTypeProfile<1, 1, [ // bitreverse SDTCisSameAs<0, 1>, SDTCisInt<0> ]>; +def SDTIntBitCountUnaryOp : SDTypeProfile<1, 1, [ // ctlz, cttz + SDTCisInt<0>, SDTCisInt<1> +]>; def SDTIntExtendOp : SDTypeProfile<1, 1, [ // sext, zext, anyext SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>, SDTCisSameNumEltsAs<0, 1> ]>; @@ -404,11 +407,11 @@ def abs : SDNode<"ISD::ABS" , SDTIntUnaryOp>; def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>; def bswap : SDNode<"ISD::BSWAP" , SDTIntUnaryOp>; -def ctlz : SDNode<"ISD::CTLZ" , SDTIntUnaryOp>; -def cttz : SDNode<"ISD::CTTZ" , SDTIntUnaryOp>; -def ctpop : SDNode<"ISD::CTPOP" , SDTIntUnaryOp>; -def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>; -def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>; +def ctlz : SDNode<"ISD::CTLZ" , SDTIntBitCountUnaryOp>; +def cttz : SDNode<"ISD::CTTZ" , SDTIntBitCountUnaryOp>; +def ctpop : SDNode<"ISD::CTPOP" , SDTIntBitCountUnaryOp>; +def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; +def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntBitCountUnaryOp>; def sext : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>; def zext : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>; def anyext : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>; Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -813,7 +813,7 @@ >; } def : GCNPat < - (i16 (add (i16 (trunc (getDivergentFrag.ret i32:$popcnt))), i16:$val)), + (i16 (add (i16 (trunc (i32 (getDivergentFrag.ret i32:$popcnt)))), i16:$val)), (V_BCNT_U32_B32_e64 $popcnt, $val) >; Index: lib/Target/Hexagon/HexagonPatterns.td =================================================================== --- lib/Target/Hexagon/HexagonPatterns.td +++ lib/Target/Hexagon/HexagonPatterns.td @@ -1677,19 +1677,19 @@ // // Count leading zeros. -def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; // Count trailing zeros. -def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>; +def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; // Count leading ones. -def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>; +def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; // Count trailing ones. -def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>; +def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; // Define leading/trailing patterns that require zero-extensions to 64 bits. Index: lib/Target/NVPTX/NVPTXInstrInfo.td =================================================================== --- lib/Target/NVPTX/NVPTXInstrInfo.td +++ lib/Target/NVPTX/NVPTXInstrInfo.td @@ -2908,7 +2908,7 @@ // ctz instruction always returns a 32-bit value. For ctlz.i64, convert the // ptx value to 64 bits to match the ISD node's semantics, unless we know we're // truncating back down to 32 bits. -def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; +def : Pat<(i64 (ctlz Int64Regs:$a)), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>; def : Pat<(i32 (trunc (ctlz Int64Regs:$a))), (CLZr64 Int64Regs:$a)>; // For 16-bit ctlz, we zero-extend to 32-bit, perform the count, then trunc the @@ -2925,10 +2925,10 @@ // and then ctlz that value. This way we don't have to subtract 16 from the // result. Unfortunately today we don't have a way to generate // "mov b32reg, {b16imm, b16reg}", so we don't do this optimization. -def : Pat<(ctlz Int16Regs:$a), +def : Pat<(i16 (ctlz Int16Regs:$a)), (SUBi16ri (CVT_u16_u32 (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE), 16)>; -def : Pat<(i32 (zext (ctlz Int16Regs:$a))), +def : Pat<(i32 (zext (i16 (ctlz Int16Regs:$a)))), (SUBi32ri (CLZr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), 16)>; // Population count @@ -2953,7 +2953,7 @@ // If we know that we're storing into an i32, we can avoid the final trunc. def : Pat<(ctpop Int16Regs:$a), (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>; -def : Pat<(i32 (zext (ctpop Int16Regs:$a))), +def : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))), (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>; // fpround f32 -> f16 Index: lib/Target/Sparc/SparcInstr64Bit.td =================================================================== --- lib/Target/Sparc/SparcInstr64Bit.td +++ lib/Target/Sparc/SparcInstr64Bit.td @@ -177,7 +177,7 @@ def : Pat<(SPcmpicc i64:$a, i64:$b), (CMPrr $a, $b)>; def : Pat<(SPcmpicc i64:$a, (i64 simm13:$b)), (CMPri $a, (as_i32imm $b))>; -def : Pat<(ctpop i64:$src), (POPCrr $src)>; +def : Pat<(i64 (ctpop i64:$src)), (POPCrr $src)>; } // Predicates = [Is64Bit] Index: lib/Target/Sparc/SparcInstrInfo.td =================================================================== --- lib/Target/Sparc/SparcInstrInfo.td +++ lib/Target/Sparc/SparcInstrInfo.td @@ -516,9 +516,9 @@ defm LDQF : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>, Requires<[HasV9, HasHardQuad]>; -let DecoderMethod = "DecodeLoadCP" in - defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>; -let DecoderMethod = "DecodeLoadCPPair" in +let DecoderMethod = "DecodeLoadCP" in + defm LDC : Load<"ld", 0b110000, load, CoprocRegs, i32>; +let DecoderMethod = "DecodeLoadCPPair" in defm LDDC : Load<"ldd", 0b110011, load, CoprocPair, v2i32, IIC_ldd>; let DecoderMethod = "DecodeLoadCP", Defs = [CPSR] in { @@ -1508,7 +1508,7 @@ def POPCrr : F3_1<2, 0b101110, (outs IntRegs:$rd), (ins IntRegs:$rs2), "popc $rs2, $rd", []>, Requires<[HasV9]>; -def : Pat<(ctpop i32:$src), +def : Pat<(i32 (ctpop i32:$src)), (POPCrr (SRLri $src, 0))>; let Predicates = [HasV9], hasSideEffects = 1, rd = 0, rs1 = 0b01111 in Index: lib/Target/SystemZ/SystemZInstrInfo.td =================================================================== --- lib/Target/SystemZ/SystemZInstrInfo.td +++ lib/Target/SystemZ/SystemZInstrInfo.td @@ -2082,7 +2082,7 @@ // cleared. We only use the first result here. let Defs = [CC] in def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>; -def : Pat<(ctlz GR64:$src), +def : Pat<(i64 (ctlz GR64:$src)), (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; // Population count. Counts bits set per byte or doubleword. Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -10617,13 +10617,13 @@ defm rr : AVX512_maskable, EVEX, AVX5128IBase, + (_.VT (OpNode (_.VT _.RC:$src1)))>, EVEX, AVX5128IBase, Sched<[sched]>; defm rm : AVX512_maskable, + (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } @@ -10636,8 +10636,8 @@ (ins _.ScalarMemOp:$src1), OpcodeStr, "${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr, - (_.VT (OpNode (X86VBroadcast - (_.ScalarLdFrag addr:$src1))))>, + (_.VT (OpNode (_.VT (X86VBroadcast + (_.ScalarLdFrag addr:$src1)))))>, EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, Sched<[sched.Folded]>; } @@ -10721,7 +10721,7 @@ multiclass avx512_unary_lowering { let Predicates = [prd, NoVLX] in { - def : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), + def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1))), (EXTRACT_SUBREG (!cast(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), @@ -10729,7 +10729,7 @@ _.info256.SubRegIdx)), _.info256.SubRegIdx)>; - def : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), + def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1))), (EXTRACT_SUBREG (!cast(InstrStr # "Zrr") (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),