Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2636,6 +2636,23 @@ MB = 64 - countTrailingOnes(Imm64); SH = 0; + if (Val.getOpcode() == ISD::ANY_EXTEND) { + auto Op0 = Val.getOperand(0); + if ( Op0.getOpcode() == ISD::SRL && + isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { + + auto ResultType = Val.getNode()->getValueType(0); + auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, + ResultType); + SDValue IDVal (ImDef, 0); + + Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, + ResultType, IDVal, Op0.getOperand(0), + getI32Imm(1, dl)), 0); + SH = 64 - Imm; + } + } + // If the operand is a logical right shift, we can fold it into this // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) // for n <= mb. The right shift is really a left rotate followed by a Index: lib/Target/PowerPC/README.txt =================================================================== --- lib/Target/PowerPC/README.txt +++ lib/Target/PowerPC/README.txt @@ -658,3 +658,9 @@ ISA 2.07. LLVM needs to add infrastructure to recognize fusion opportunities and force instruction pairs to be scheduled together. +----------------------------------------------------------------------------- + +More general handling of any_extend and zero_extend: + +See https://reviews.llvm.org/D24924#555306 + Index: test/CodeGen/PowerPC/anyext_srl.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/anyext_srl.ll @@ -0,0 +1,29 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s + +%class.PB2 = type { [1 x i32], %class.PB1* } +%class.PB1 = type { [1 x i32], i64, i64, i32 } + +; Function Attrs: norecurse nounwind readonly +define zeroext i1 @foo(%class.PB2* %s_a, %class.PB2* %s_b) local_unnamed_addr { +entry: + %arrayidx.i6 = bitcast %class.PB2* %s_a to i32* + %0 = load i32, i32* %arrayidx.i6, align 8, !tbaa !1 + %and.i = and i32 %0, 8 + %cmp.i = icmp ne i32 %and.i, 0 + %arrayidx.i37 = bitcast %class.PB2* %s_b to i32* + %1 = load i32, i32* %arrayidx.i37, align 8, !tbaa !1 + %and.i4 = and i32 %1, 8 + %cmp.i5 = icmp ne i32 %and.i4, 0 + %cmp = xor i1 %cmp.i, %cmp.i5 + ret i1 %cmp +; CHECK-LABEL: @foo +; CHECK: rldicl {{[0-9]+}}, {{[0-9]+}}, 61, 63 + +} + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"} +