Index: llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -899,6 +899,36 @@ break; } + case Instruction::AShr: { + // If the right shift operand 0 is a result of a left shift by the same + // amount, this is probably a zero/sign extension, which may be unnecessary, + // if the demanded bits are already within original operand. So, return the + // original operand instead. + Instruction *Op0 = dyn_cast(I->getOperand(0)); + if (Op0 && Op0->getOpcode() == Instruction::Shl) { + const APInt *SRA; + const APInt *SLA; + if (match(I->getOperand(1), m_APInt(SRA)) && + match(Op0->getOperand(1), m_APInt(SLA))) { + unsigned BitWidth = DemandedMask.getBitWidth(); + uint32_t RightShiftAmt = SRA->getLimitedValue(BitWidth - 1); + uint32_t LeftShiftAmt = SLA->getLimitedValue(BitWidth - 1); + + if (RightShiftAmt == LeftShiftAmt) { + // Check if the demanded bits only need the bits that aren't + // sign-extended + APInt ValMask(BitWidth, 0); + ValMask.setBits(0, BitWidth - RightShiftAmt - 1); + // If so, return the operand of Shl rendering the Shl/AShr dead for + // this instance of use at least + if (DemandedMask.isSubsetOf(ValMask)) { + return Op0->getOperand(0); + } + } + } + } + break; + } default: // Compute the Known bits to simplify things downstream. computeKnownBits(I, Known, Depth, CxtI); Index: llvm/test/Transforms/InstCombine/simplify-multiuse-demanded-bits-ashr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/simplify-multiuse-demanded-bits-ashr.ll @@ -0,0 +1,181 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Scenario : Only lower bits of the result of shl/ashr combination (sign extend) +; are demanded by any of the uses. +; Test : When the upper bits of the sign-extend result aren't used, the +; sign-extend operation can be completely optimized away. + +define dso_local i64 @multi_use_sign_extend_not_affecting_demanded_bits(i64 %a, i32 %b, i16 signext %c, i32 %d) local_unnamed_addr addrspace(1) #0 { + +; CHECK-LABEL: @multi_use_sign_extend_not_affecting_demanded_bits( +; CHECK: %0 = and i32 %b, 1 +; CHECK-NEXT: %[[COND:[a-z0-9.]+]] = icmp eq i32 %0, 0 +;; *** Optimized away *** +; CHECK-NOT: %conv = shl i32 %d, 16 +; CHECK-NOT: %conv1 = ashr i32 %conv, 16 +;; *** +; CHECK: br i1 %[[COND]], label %{{[a-z0-9.]+}}, label %{{[a-z0-9.]+}} + +; CHECK: if.then +; CHECK-NEXT: %and = and i32 %d, 1023 + +; CHECK: if.else +; CHECK: %and5 = and i32 %d, 16320 + +entry: + %rem = srem i32 %b, 2 + %tobool = icmp ne i32 %rem, 0 +;; *** Target case: optimize away a multi-use sign-extension result, if the +;; demanded bits are within non-extended value itself + %conv = shl i32 %d, 16 + %conv1 = ashr i32 %conv, 16 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + %and = and i32 %conv1, 1023 + %or = or i32 %and, %b + br label %if.end + +if.else: ; preds = %entry + %and5 = and i32 %conv1, 16320 + %or8 = or i32 %and5, %b + br label %if.end + +if.end: ; preds = %if.else, %if.then + %d.addr.0 = phi i32 [ %or, %if.then ], [ %or8, %if.else ] + %conv12 = sext i32 %d.addr.0 to i64 + %and13 = and i64 %a, %conv12 + ret i64 %and13 +} + + +; Scenario : There are multiple uses of the sign-extension result and only one +; of them is demanding bits dependent upon sign-extension. +; Test : In such a case, the optimization can't take place. However the +; instcombine pass will sink the sign-extension nearer to the only +; use. + +define dso_local i64 @multi_use_sign_extend_affecting_demanded_bits_single_use(i64 %a, i32 %b, i16 signext %c, i32 %d) local_unnamed_addr addrspace(1) #0 { +; CHECK-LABEL: @multi_use_sign_extend_affecting_demanded_bits_single_use( +; CHECK: %0 = and i32 %b, 1 +; CHECK-NEXT: %[[COND:[a-z0-9.]+]] = icmp eq i32 %0, 0 +;; *** Not present here, but moved near the single use of sext result (if.else) +; CHECK-NOT: %conv = shl i32 %d, 16 +; CHECK-NOT: %conv1 = ashr exact i32 %conv, 16 +; CHECK-NOT: %conv1 = ashr i32 %conv, 16 +;; *** +; CHECK-NEXT: br i1 %[[COND]], label %{{[a-z0-9.]+}}, label %{{[a-z0-9.]+}} + +; CHECK: if.then +; CHECK-NEXT: %and = and i32 %d, 1023 + +; CHECK: if.else +;; *** Sunk to the branch where sign-extend is affecting demanded bits +; CHECK: %[[conv:.*]] = shl i32 %d, 16 +; CHECK-NEXT: %[[conv1:.*]] = ashr exact i32 %[[conv]], 16 +;; *** +; CHECK-NEXT: %and5 = and i32 %[[conv1]], 16320000 + +entry: + %rem = srem i32 %b, 2 + %tobool = icmp ne i32 %rem, 0 + %conv = shl i32 %d, 16 + %conv1 = ashr i32 %conv, 16 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + %and = and i32 %conv1, 1023 + %or = or i32 %and, %b + br label %if.end + +if.else: ; preds = %entry + %and5 = and i32 %conv1, 16320000 + %or8 = or i32 %and5, %b + br label %if.end + +if.end: ; preds = %if.else, %if.then + %d.addr.0 = phi i32 [ %or, %if.then ], [ %or8, %if.else ] + %conv12 = sext i32 %d.addr.0 to i64 + %and13 = and i64 %a, %conv12 + ret i64 %and13 +} + +; Scenario : The basic operations are similar to sign-extend, but differ slightly +; Test : In such a case, it shouldn't be optimized away, as the code change +; is supposed to work only for the sign-extend case (so the lower +; bits, which are demanded, aren't different between the operand and +; result of sign-extend operation). + +define dso_local i64 @multi_use_not_fitting_opt(i64 %a, i32 %b, i16 signext %c, i32 %d) local_unnamed_addr addrspace(1) #0 { +; CHECK-LABEL: @multi_use_not_fitting_opt( +; CHECK: %0 = and i32 %b, 1 +; CHECK-NEXT: %[[COND:[a-z0-9.]+]] = icmp eq i32 %0, 0 +;; *** Not optimized *** +; CHECK-NEXT: %[[conv:.*]] = shl i32 %d, 16 +; CHECK-NEXT: %[[conv1:.*]] = ashr i32 %[[conv]], 17 +;; ********************* +; CHECK-NEXT: br i1 %[[COND]], label %{{[a-z0-9.]+}}, label %{{[a-z0-9.]+}} + +; CHECK: if.then +; CHECK: %and = and i32 %[[conv1]], 1023 + +; CHECK: if.else +; CHECK: %and5 = and i32 %[[conv1]], 16320 + +entry: + %rem = srem i32 %b, 2 + %tobool = icmp ne i32 %rem, 0 +;; *** Different shift values, should not optimize *** + %conv = shl i32 %d, 16 + %conv1 = ashr i32 %conv, 17 +;; *************************************************** + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + %and = and i32 %conv1, 1023 + %or = or i32 %and, %b + br label %if.end + +if.else: ; preds = %entry + %and5 = and i32 %conv1, 16320 + %or8 = or i32 %and5, %b + br label %if.end + +if.end: ; preds = %if.else, %if.then + %d.addr.0 = phi i32 [ %or, %if.then ], [ %or8, %if.else ] + %conv12 = sext i32 %d.addr.0 to i64 + %and13 = and i64 %a, %conv12 + ret i64 %and13 +} + +define dso_local i64 @single_use_already_optimized(i64 %a, i32 %b, i16 signext %c, i32 %d) local_unnamed_addr addrspace(1) #0 { +; CHECK-LABEL: @single_use_already_optimized( +; CHECK: %rem = srem i32 %b, 2 +; CHECK: %tobool.not = icmp eq i32 %rem, 0 +; *** Single use is optimized away already, if demandedbits doesn't depend upon sext +; CHECK-NOT: shl i32 %d, 16 +; CHECK-NOT: ashr i32 %conv, 16 +; *** +; CHECK: br i1 %tobool.not, label %if.end, label %if.then + +; CHECK: if.then +; CHECK: %and = and i32 %d, 1023 + +entry: + %rem = srem i32 %b, 2 + %tobool = icmp ne i32 %rem, 0 + %conv = shl i32 %d, 16 + %conv1 = ashr i32 %conv, 16 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + %and = and i32 %conv1, 1023 + %or = or i32 %and, %b + br label %if.end + +if.end: ; preds = %if.else, %if.then + %d.addr.0 = phi i32 [ %rem, %entry], [ %or, %if.then ] + %conv12 = sext i32 %d.addr.0 to i64 + %and13 = and i64 %a, %conv12 + ret i64 %and13 +}