diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8931,21 +8931,33 @@ return UseMask(NewMask); } -static void computeGREV(APInt &Src, unsigned ShAmt) { +static void computeGREVOrGORC(APInt &Src, unsigned ShAmt, bool IsGORC, + bool ComputeZeros = false) { + static const uint64_t GREVMasks[] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL, + 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL}; + ShAmt &= Src.getBitWidth() - 1; uint64_t x = Src.getZExtValue(); - if (ShAmt & 1) - x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); - if (ShAmt & 2) - x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); - if (ShAmt & 4) - x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); - if (ShAmt & 8) - x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); - if (ShAmt & 16) - x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); - if (ShAmt & 32) - x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); + + // To compute zeros, we need to invert the value and invert it back after. + if (ComputeZeros) + x = ~x; + + for (unsigned Stage = 0; Stage != 6; ++Stage) { + unsigned Shift = 1 << Stage; + if (ShAmt & Shift) { + uint64_t Mask = GREVMasks[Stage]; + uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask); + if (IsGORC) + Res |= x; + x = Res; + } + } + + if (ComputeZeros) + x = ~x; + Src = x; } @@ -9011,12 +9023,15 @@ Known.Zero.setBitsFrom(LowBits); break; } - case RISCVISD::GREV: { + case RISCVISD::GREV: + case RISCVISD::GORC: { if (auto *C = dyn_cast(Op.getOperand(1))) { Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); unsigned ShAmt = C->getZExtValue(); - computeGREV(Known.Zero, ShAmt); - computeGREV(Known.One, ShAmt); + bool IsGORC = Op.getOpcode() == RISCVISD::GORC; + computeGREVOrGORC(Known.Zero, ShAmt, IsGORC, + /*ComputeZeros*/ true); + computeGREVOrGORC(Known.One, ShAmt, IsGORC); } break; } diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbb-intrinsic.ll --- a/llvm/test/CodeGen/RISCV/rv32zbb-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-intrinsic.ll @@ -12,3 +12,22 @@ %tmp = call i32 @llvm.riscv.orc.b.i32(i32 %a) ret i32 %tmp } + +; Second and+or are redundant with the first, make sure we remove it. +define i32 @orcb_knownbits(i32 %a) nounwind { +; RV32ZBB-LABEL: orcb_knownbits: +; RV32ZBB: # %bb.0: +; RV32ZBB-NEXT: lui a1, 1044480 +; RV32ZBB-NEXT: and a0, a0, a1 +; RV32ZBB-NEXT: lui a1, 2048 +; RV32ZBB-NEXT: addi a1, a1, 1 +; RV32ZBB-NEXT: or a0, a0, a1 +; RV32ZBB-NEXT: orc.b a0, a0 +; RV32ZBB-NEXT: ret + %tmp = and i32 %a, 4278190080 ; 0xFF000000 + %tmp2 = or i32 %tmp, 8388609 ; 0x800001 + %tmp3 = call i32 @llvm.riscv.orc.b.i32(i32 %tmp2) + %tmp4 = and i32 %tmp3, 4278190080 ; 0xFF000000 + %tmp5 = or i32 %tmp4, 16711935 ; 0xFF00FF + ret i32 %tmp5 +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll --- a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll @@ -25,20 +25,46 @@ ; RV64ZBB-LABEL: orcb32_zext: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: orc.b a0, a0 -; RV64ZBB-NEXT: slli a0, a0, 32 -; RV64ZBB-NEXT: srli a0, a0, 32 ; RV64ZBB-NEXT: ret ; ; RV64ZBP-LABEL: orcb32_zext: ; RV64ZBP: # %bb.0: ; RV64ZBP-NEXT: orc.b a0, a0 -; RV64ZBP-NEXT: slli a0, a0, 32 -; RV64ZBP-NEXT: srli a0, a0, 32 ; RV64ZBP-NEXT: ret %tmp = call i32 @llvm.riscv.orc.b.i32(i32 %a) ret i32 %tmp } +; Second and+or is redundant with the first, make sure we remove them. +define signext i32 @orcb32_knownbits(i32 signext %a) nounwind { +; RV64ZBB-LABEL: orcb32_knownbits: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, 1044480 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, 2048 +; RV64ZBB-NEXT: addiw a1, a1, 1 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: orc.b a0, a0 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: ret +; +; RV64ZBP-LABEL: orcb32_knownbits: +; RV64ZBP: # %bb.0: +; RV64ZBP-NEXT: lui a1, 1044480 +; RV64ZBP-NEXT: and a0, a0, a1 +; RV64ZBP-NEXT: lui a1, 2048 +; RV64ZBP-NEXT: addiw a1, a1, 1 +; RV64ZBP-NEXT: or a0, a0, a1 +; RV64ZBP-NEXT: gorciw a0, a0, 7 +; RV64ZBP-NEXT: ret + %tmp = and i32 %a, 4278190080 ; 0xFF000000 + %tmp2 = or i32 %tmp, 8388609 ; 0x800001 + %tmp3 = call i32 @llvm.riscv.orc.b.i32(i32 %tmp2) + %tmp4 = and i32 %tmp3, 4278190080 ; 0xFF000000 + %tmp5 = or i32 %tmp4, 16711935 ; 0xFF00FF + ret i32 %tmp5 +} + declare i64 @llvm.riscv.orc.b.i64(i64) define i64 @orcb64(i64 %a) nounwind { @@ -54,3 +80,40 @@ %tmp = call i64 @llvm.riscv.orc.b.i64(i64 %a) ret i64 %tmp } + +; Second and+or is redundant with the first, make sure we remove them. +define i64 @orcb64_knownbits(i64 %a) nounwind { +; RV64ZBB-LABEL: orcb64_knownbits: +; RV64ZBB: # %bb.0: +; RV64ZBB-NEXT: lui a1, 65535 +; RV64ZBB-NEXT: slli a1, a1, 12 +; RV64ZBB-NEXT: and a0, a0, a1 +; RV64ZBB-NEXT: lui a1, 131073 +; RV64ZBB-NEXT: slli a1, a1, 13 +; RV64ZBB-NEXT: addi a1, a1, 1 +; RV64ZBB-NEXT: slli a1, a1, 20 +; RV64ZBB-NEXT: addi a1, a1, 8 +; RV64ZBB-NEXT: or a0, a0, a1 +; RV64ZBB-NEXT: orc.b a0, a0 +; RV64ZBB-NEXT: ret +; +; RV64ZBP-LABEL: orcb64_knownbits: +; RV64ZBP: # %bb.0: +; RV64ZBP-NEXT: lui a1, 65535 +; RV64ZBP-NEXT: slli a1, a1, 12 +; RV64ZBP-NEXT: and a0, a0, a1 +; RV64ZBP-NEXT: lui a1, 131073 +; RV64ZBP-NEXT: slli a1, a1, 13 +; RV64ZBP-NEXT: addi a1, a1, 1 +; RV64ZBP-NEXT: slli a1, a1, 20 +; RV64ZBP-NEXT: addi a1, a1, 8 +; RV64ZBP-NEXT: or a0, a0, a1 +; RV64ZBP-NEXT: orc.b a0, a0 +; RV64ZBP-NEXT: ret + %tmp = and i64 %a, 1099494850560 ; 0x000000ffff000000 + %tmp2 = or i64 %tmp, 4611721202800525320 ; 0x4000200000100008 + %tmp3 = call i64 @llvm.riscv.orc.b.i64(i64 %tmp2) + %tmp4 = and i64 %tmp3, 1099494850560 ; 0x000000ffff000000 + %tmp5 = or i64 %tmp4, 18374966855153418495 ; 0xff00ff0000ff00ff + ret i64 %tmp5 +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll @@ -75,6 +75,15 @@ ret i32 %tmp } +define zeroext i32 @gorci32_zext(i32 zeroext %a) nounwind { +; RV64ZBP-LABEL: gorci32_zext: +; RV64ZBP: # %bb.0: +; RV64ZBP-NEXT: orc.w a0, a0 +; RV64ZBP-NEXT: ret + %tmp = call i32 @llvm.riscv.gorc.i32(i32 %a, i32 31) + ret i32 %tmp +} + declare i32 @llvm.riscv.shfl.i32(i32 %a, i32 %b) define signext i32 @shfl32(i32 signext %a, i32 signext %b) nounwind { @@ -515,6 +524,21 @@ ret i64 %tmp } +; The second OR is redundant with the first. Make sure we remove it. +define i64 @gorci64_knownbits(i64 %a) nounwind { +; RV64ZBP-LABEL: gorci64_knownbits: +; RV64ZBP: # %bb.0: +; RV64ZBP-NEXT: lui a1, %hi(.LCPI54_0) +; RV64ZBP-NEXT: ld a1, %lo(.LCPI54_0)(a1) +; RV64ZBP-NEXT: or a0, a0, a1 +; RV64ZBP-NEXT: orc32 a0, a0 +; RV64ZBP-NEXT: ret + %tmp = or i64 %a, 72624976668147840 ; 0x102040810204080 + %tmp2 = call i64 @llvm.riscv.gorc.i64(i64 %tmp, i64 32) + %tmp3 = or i64 %tmp2, 1234624599046636680 ; 0x1122448811224488 + ret i64 %tmp3 +} + define i64 @orchi64(i64 %a) nounwind { ; RV64ZBP-LABEL: orchi64: ; RV64ZBP: # %bb.0: