Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -13633,6 +13633,27 @@ Known.One &= Mask; return; } + case ARMISD::VGETLANEs: + case ARMISD::VGETLANEu: { + const SDValue &SrcSV = Op.getOperand(0); + Known = DAG.computeKnownBits(SrcSV, Depth + 1); + EVT VT = Op.getValueType(); + EVT EltVT = SrcSV.getValueType(); + const unsigned DstSz = VT.getScalarSizeInBits(); + const unsigned SrcSz = EltVT.getVectorElementType().getSizeInBits(); + + assert(SrcSz == Known.getBitWidth()); + if (DstSz > SrcSz) { + if (Op.getOpcode() == ARMISD::VGETLANEs) + Known = Known.sext(DstSz); + else { + Known = Known.zext(DstSz); + Known.Zero.setBitsFrom(SrcSz); + } + } + assert(DstSz == Known.getBitWidth()); + break; + } } } Index: test/CodeGen/ARM/dagcombine-anyexttozeroext.ll =================================================================== --- test/CodeGen/ARM/dagcombine-anyexttozeroext.ll +++ test/CodeGen/ARM/dagcombine-anyexttozeroext.ll @@ -21,17 +21,16 @@ ; CHECK-LABEL: g: define float @g(<4 x i16>* nocapture %in) { - ; CHECK: vldr +; CHECK: vldr d16, [r0] + ; FIXME: We should be able to extend without + ; having to generate cross-domain copies. +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f32.u32 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr %1 = load <4 x i16>, <4 x i16>* %in - - ; For now we're generating a vmov.16 and a uxth instruction. - ; The uxth is redundant, and we should be able to extend without - ; having to generate cross-domain copies. Once we can do this - ; we should modify the checks below. - - ; CHECK: uxth %2 = extractelement <4 x i16> %1, i32 0 - ; CHECK: vcvt.f32.u32 %3 = uitofp i16 %2 to float ret float %3 } @@ -60,3 +59,47 @@ %13 = insertelement <4 x i32> %10, i32 %12, i32 3 ret <4 x i32> %13 } + +define float @i(<4 x i16>* nocapture %in) { +; CHECK-LABEL: i: +; CHECK: vldr d16, [r0] +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr + %1 = load <4 x i16>, <4 x i16>* %in + %2 = extractelement <4 x i16> %1, i32 0 + %3 = sitofp i16 %2 to float + ret float %3 +} + +; CHECK-LABEL: j: +define float @j(<8 x i8>* nocapture %in) { +; CHECK: vldr d16, [r0] +; CHECK-NEXT: vmov.u8 r0, d16[7] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f32.u32 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr + %1 = load <8 x i8>, <8 x i8>* %in + %2 = extractelement <8 x i8> %1, i32 7 + %3 = uitofp i8 %2 to float + ret float %3 +} + +; CHECK-LABEL: k: +define float @k(<8 x i8>* nocapture %in) { +; CHECK: vldr d16, [r0] +; CHECK-NEXT: vmov.u8 r0, d16[7] +; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvt.f32.s32 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr + %1 = load <8 x i8>, <8 x i8>* %in + %2 = extractelement <8 x i8> %1, i32 7 + %3 = sitofp i8 %2 to float + ret float %3 +}