diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -373,6 +373,7 @@ bool signExtendConstant(const ConstantInt *CI) const override; bool isCheapToSpeculateCttz(Type *Ty) const override; bool isCheapToSpeculateCtlz(Type *Ty) const override; + bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override; bool hasAndNotCompare(SDValue Y) const override; bool hasBitTest(SDValue X, SDValue Y) const override; bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1165,6 +1165,22 @@ return Subtarget.hasStdExtZbb(); } +bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( + const Instruction &AndI) const { + // We expect to be able to match a bit extraction instruction if the Zbs + // extension is supported and the mask is a power of two. However, we + // conservatively return false if the mask would fit in an ANDI instruction, + // on the basis that it's possible the sinking+duplication of the AND in + // CodeGenPrepare triggered by this hook wouldn't decrease the instruction + // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). + if (!Subtarget.hasStdExtZbs()) + return false; + ConstantInt *Mask = dyn_cast(AndI.getOperand(1)); + if (!Mask) + return false; + return !Mask->getValue().isSignedIntN(12) && Mask->getValue().isPowerOf2(); +} + bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { EVT VT = Y.getValueType(); diff --git a/llvm/test/Transforms/CodeGenPrepare/RISCV/and-mask-sink.ll b/llvm/test/Transforms/CodeGenPrepare/RISCV/and-mask-sink.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/RISCV/and-mask-sink.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare -mtriple=riscv32 %s \ +; RUN: | FileCheck --check-prefixes=CHECK,NOZBS %s +; RUN: opt -S -codegenprepare -mtriple=riscv32 -mattr=+zbs %s \ +; RUN: | FileCheck --check-prefixes=CHECK,ZBS %s +; RUN: opt -S -codegenprepare -mtriple=riscv64 %s \ +; RUN: | FileCheck --check-prefixes=CHECK,NOZBS %s +; RUN: opt -S -codegenprepare -mtriple=riscv64 -mattr=zbs %s \ +; RUN: | FileCheck --check-prefixes=CHECK,ZBS %s + +@A = global i32 zeroinitializer + +; And should be sunk when Zbs is present and the mask doesn't fit in ANDI's +; immediate. +define i32 @and_sink1(i32 %a, i1 %c) { +; NOZBS-LABEL: @and_sink1( +; NOZBS-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 2048 +; NOZBS-NEXT: br label [[BB0:%.*]] +; NOZBS: bb0: +; NOZBS-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; NOZBS-NEXT: store i32 0, i32* @A, align 4 +; NOZBS-NEXT: br i1 [[CMP]], label [[BB0]], label [[BB2:%.*]] +; NOZBS: bb2: +; NOZBS-NEXT: ret i32 0 +; +; ZBS-LABEL: @and_sink1( +; ZBS-NEXT: br label [[BB0:%.*]] +; ZBS: bb0: +; ZBS-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 2048 +; ZBS-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0 +; ZBS-NEXT: store i32 0, i32* @A, align 4 +; ZBS-NEXT: br i1 [[CMP]], label [[BB0]], label [[BB2:%.*]] +; ZBS: bb2: +; ZBS-NEXT: ret i32 0 +; + %and = and i32 %a, 2048 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} + +; Don't sink when the mask has more than 1 bit set. +define i32 @and_sink2(i32 %a) { +; CHECK-LABEL: @and_sink2( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 2049 +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: store i32 0, i32* @A, align 4 +; CHECK-NEXT: br i1 [[CMP]], label [[BB0]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret i32 0 +; + %and = and i32 %a, 2049 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +} + +; Don't sink when the mask fits in ANDI's immediate. +define i32 @and_sink3(i32 %a) { +; CHECK-LABEL: @and_sink3( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[A:%.*]], 1024 +; CHECK-NEXT: br label [[BB0:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: store i32 0, i32* @A, align 4 +; CHECK-NEXT: br i1 [[CMP]], label [[BB0]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: ret i32 0 +; + %and = and i32 %a, 1024 + br label %bb0 +bb0: + %cmp = icmp eq i32 %and, 0 + store i32 0, i32* @A + br i1 %cmp, label %bb0, label %bb2 +bb2: + ret i32 0 +}