Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -11015,10 +11015,11 @@ TargetLoweringBase::LegalizeTypeAction AArch64TargetLowering::getPreferredVectorAction(EVT VT) const { MVT SVT = VT.getSimpleVT(); - // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8, - // v4i16, v2i32 instead of to promote. - if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32 - || SVT == MVT::v1f32) + // During type legalization, we prefer to widen v1i8, v2i8, v4i8, v1i16, + // v2i16, v1i32, v1f32 to v8i8, v4i16, v2i32, v2f32 instead of to promote. + if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32 || + SVT == MVT::v1f32 || SVT == MVT::v2i8 || SVT == MVT::v4i8 || + SVT == MVT::v2i16) return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -97,7 +97,7 @@ bool NegativeImmediates = true; // Enable 64-bit vectorization in SLP. - unsigned MinVectorRegisterBitWidth = 64; + unsigned MinVectorRegisterBitWidth = 16; bool UseAA = false; bool PredictableSelectIsExpensive = false; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -612,16 +612,6 @@ return LT.first * 2 * AmortizationCost; } - if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8) && - Ty->getVectorNumElements() < 8) { - // We scalarize the loads/stores because there is not v.4b register and we - // have to promote the elements to v.4h. - unsigned NumVecElts = Ty->getVectorNumElements(); - unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; - // We generate 2 instructions per vector element. - return NumVectorizableInstsToAmortize * NumVecElts * 2; - } - return LT.first; } Index: llvm/test/Analysis/CostModel/AArch64/store.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/store.ll +++ llvm/test/Analysis/CostModel/AArch64/store.ll @@ -57,13 +57,13 @@ ; We scalarize the loads/stores because there is no vector register name for ; these types (they get extended to v.4h/v.2s). - ; CHECK: cost of 16 {{.*}} store + ; CHECK: cost of 1 {{.*}} store store <2 x i8> undef, <2 x i8> * undef - ; CHECK: cost of 64 {{.*}} store + ; CHECK: cost of 1 {{.*}} store store <4 x i8> undef, <4 x i8> * undef - ; CHECK: cost of 16 {{.*}} load + ; CHECK: cost of 1 {{.*}} load load <2 x i8> , <2 x i8> * undef - ; CHECK: cost of 64 {{.*}} load + ; CHECK: cost of 1 {{.*}} load load <4 x i8> , <4 x i8> * undef ret void Index: llvm/test/CodeGen/AArch64/aarch64-narrow-vectors.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/aarch64-narrow-vectors.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s + +; Check that vector widening (i.e., v4i8 -> v8i8) happens on narrow vectors that +; do not have a native vector instruction. + +; CHECK-LABEL: fun_v2i8: +; CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +define void @fun_v2i8(i8* noalias nocapture %out, i8* noalias nocapture readonly %in) { +entry: + %0 = bitcast i8* %in to <2 x i8>* + %1 = load <2 x i8>, <2 x i8>* %0, align 1 + %2 = add <2 x i8> , %1 + %3 = bitcast i8* %out to <2 x i8>* + store <2 x i8> %2, <2 x i8>* %3, align 1 + ret void +} + +; CHECK-LABEL: fun_v4i8: +; CHECK: add {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +define void @fun_v4i8(i8* noalias nocapture %out, i8* noalias nocapture readonly %in) { +entry: + %0 = bitcast i8* %in to <4 x i8>* + %1 = load <4 x i8>, <4 x i8>* %0, align 1 + %2 = add <4 x i8> , %1 + %3 = bitcast i8* %out to <4 x i8>* + store <4 x i8> %2, <4 x i8>* %3, align 1 + ret void +} + +; CHECK-LABEL: fun_v2i16: +; CHECK: add {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h +define void @fun_v2i16(i16* noalias nocapture %out, i16* noalias nocapture readonly %in) { +entry: + %0 = bitcast i16* %in to <2 x i16>* + %1 = load <2 x i16>, <2 x i16>* %0, align 1 + %2 = add <2 x i16> , %1 + %3 = bitcast i16* %out to <2 x i16>* + store <2 x i16> %2, <2 x i16>* %3, align 1 + ret void +} Index: llvm/test/CodeGen/AArch64/arm64-rev.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-rev.ll +++ llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -222,8 +222,7 @@ define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp { ; CHECK-LABEL: test_vrev64: ; CHECK: ldr [[DEST:q[0-9]+]], -; CHECK: st1.h -; CHECK: st1.h +; CHECK: st1.s entry: %0 = bitcast <4 x i16>* %source to <8 x i16>* %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4 Index: llvm/test/CodeGen/AArch64/arm64-storebytesmerge.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-storebytesmerge.ll +++ llvm/test/CodeGen/AArch64/arm64-storebytesmerge.ll @@ -3,13 +3,14 @@ ;target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" ;target triple = "aarch64--linux-gnu" - ; CHECK-LABEL: test -; CHECK: str x30, [sp, #-16]! -; CHECK: adrp x8, q -; CHECK: ldr x8, [x8, :lo12:q] -; CHECK: stp xzr, xzr, [x8] -; CHECK: bl f +; CHECK: str x30, [sp, #-16]! +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: adrp x8, q +; CHECK-NEXT: ldr x8, [x8, :lo12:q] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: bl f @q = external unnamed_addr global i16*, align 8 Index: llvm/test/CodeGen/AArch64/bitcast-v2i8.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitcast-v2i8.ll +++ llvm/test/CodeGen/AArch64/bitcast-v2i8.ll @@ -4,10 +4,8 @@ define i16 @test_bitcast_v2i8_to_i16(<2 x i8> %a) { ; CHECK-LABEL: test_bitcast_v2i8_to_i16 -; CHECK: mov.s [[WREG_HI:w[0-9]+]], v0[1] -; CHECK-NEXT: fmov [[WREG_LO:w[0-9]+]], s0 -; CHECK-NEXT: strb [[WREG_HI]], [sp, #15] -; CHECK-NEXT: strb [[WREG_LO]], [sp, #14] +; CHECK: add x[[OP1:[0-9]+]], sp, #14 ; =14 +; CHECK-NEXT: st1.h { v0 }[0], [x[[OP1]]] ; CHECK-NEXT: ldrh w0, [sp, #14] %aa = bitcast <2 x i8> %a to i16 Index: llvm/test/CodeGen/AArch64/bitreverse.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitreverse.ll +++ llvm/test/CodeGen/AArch64/bitreverse.ll @@ -6,13 +6,33 @@ define <2 x i16> @f(<2 x i16> %a) { ; CHECK-LABEL: f: -; CHECK: fmov [[REG1:w[0-9]+]], s0 -; CHECK-DAG: rbit [[REG2:w[0-9]+]], [[REG1]] -; CHECK-DAG: fmov s0, [[REG2]] -; CHECK-DAG: mov [[REG3:w[0-9]+]], v0.s[1] -; CHECK-DAG: rbit [[REG4:w[0-9]+]], [[REG3]] -; CHECK-DAG: mov v0.s[1], [[REG4]] -; CHECK-DAG: ushr v0.2s, v0.2s, #16 +; CHECK: rev16 v + +; FIXME: other than using vector rev16, the generated code looks ugly: +; rev16 v0.8b, v0.8b +; movi v1.8b, #15 +; movi v2.8b, #240 +; and v1.8b, v0.8b, v1.8b +; and v0.8b, v0.8b, v2.8b +; shl v1.8b, v1.8b, #4 +; ushr v0.8b, v0.8b, #4 +; movi v2.8b, #51 +; orr v0.8b, v0.8b, v1.8b +; movi v1.8b, #204 +; and v2.8b, v0.8b, v2.8b +; and v0.8b, v0.8b, v1.8b +; shl v2.8b, v2.8b, #2 +; ushr v0.8b, v0.8b, #2 +; movi v1.8b, #85 +; orr v0.8b, v0.8b, v2.8b +; movi v2.8b, #170 +; and v1.8b, v0.8b, v1.8b +; and v0.8b, v0.8b, v2.8b +; shl v1.8b, v1.8b, #1 +; ushr v0.8b, v0.8b, #1 +; orr v0.8b, v0.8b, v1.8b +; ret + %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a) ret <2 x i16> %b } Index: llvm/test/CodeGen/AArch64/complex-fp-to-int.ll =================================================================== --- llvm/test/CodeGen/AArch64/complex-fp-to-int.ll +++ llvm/test/CodeGen/AArch64/complex-fp-to-int.ll @@ -20,7 +20,7 @@ define <2 x i16> @test_v2f32_to_signed_v2i16(<2 x float> %in) { ; CHECK-LABEL: test_v2f32_to_signed_v2i16: -; CHECK: fcvtzs.2s v0, v0 +; CHECK: fcvtzs.4s v0, v0 %val = fptosi <2 x float> %in to <2 x i16> ret <2 x i16> %val @@ -28,7 +28,7 @@ define <2 x i16> @test_v2f32_to_unsigned_v2i16(<2 x float> %in) { ; CHECK-LABEL: test_v2f32_to_unsigned_v2i16: -; CHECK: fcvtzs.2s v0, v0 +; CHECK: fcvtzu.4s v0, v0 %val = fptoui <2 x float> %in to <2 x i16> ret <2 x i16> %val @@ -36,7 +36,8 @@ define <2 x i8> @test_v2f32_to_signed_v2i8(<2 x float> %in) { ; CHECK-LABEL: test_v2f32_to_signed_v2i8: -; CHECK: fcvtzs.2s v0, v0 +; CHECK: fcvtzs w +; CHECK: fcvtzs w %val = fptosi <2 x float> %in to <2 x i8> ret <2 x i8> %val @@ -44,7 +45,8 @@ define <2 x i8> @test_v2f32_to_unsigned_v2i8(<2 x float> %in) { ; CHECK-LABEL: test_v2f32_to_unsigned_v2i8: -; CHECK: fcvtzs.2s v0, v0 +; CHECK: fcvtzs w +; CHECK: fcvtzs w %val = fptoui <2 x float> %in to <2 x i8> ret <2 x i8> %val @@ -70,8 +72,10 @@ define <4 x i8> @test_v4f32_to_signed_v4i8(<4 x float> %in) { ; CHECK-LABEL: test_v4f32_to_signed_v4i8: -; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0 -; CHECK: xtn.4h v0, [[VAL64]] +; CHECK: fcvtzs w +; CHECK: fcvtzs w +; CHECK: fcvtzs w +; CHECK: fcvtzs w %val = fptosi <4 x float> %in to <4 x i8> ret <4 x i8> %val @@ -79,8 +83,10 @@ define <4 x i8> @test_v4f32_to_unsigned_v4i8(<4 x float> %in) { ; CHECK-LABEL: test_v4f32_to_unsigned_v4i8: -; CHECK: fcvtzs.4s [[VAL64:v[0-9]+]], v0 -; CHECK: xtn.4h v0, [[VAL64]] +; CHECK: fcvtzs w +; CHECK: fcvtzs w +; CHECK: fcvtzs w +; CHECK: fcvtzs w %val = fptoui <4 x float> %in to <4 x i8> ret <4 x i8> %val @@ -106,8 +112,8 @@ define <2 x i16> @test_v2f64_to_signed_v2i16(<2 x double> %in) { ; CHECK-LABEL: test_v2f64_to_signed_v2i16: -; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 -; CHECK: xtn.2s v0, [[VAL64]] +; CHECK: fcvtzs w +; CHECK: fcvtzs w %val = fptosi <2 x double> %in to <2 x i16> ret <2 x i16> %val @@ -115,8 +121,8 @@ define <2 x i16> @test_v2f64_to_unsigned_v2i16(<2 x double> %in) { ; CHECK-LABEL: test_v2f64_to_unsigned_v2i16: -; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 -; CHECK: xtn.2s v0, [[VAL64]] +; CHECK: fcvtzs w +; CHECK: fcvtzs w %val = fptoui <2 x double> %in to <2 x i16> ret <2 x i16> %val @@ -124,8 +130,8 @@ define <2 x i8> @test_v2f64_to_signed_v2i8(<2 x double> %in) { ; CHECK-LABEL: test_v2f64_to_signed_v2i8: -; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 -; CHECK: xtn.2s v0, [[VAL64]] +; CHECK: fcvtzs w +; CHECK: fcvtzs w %val = fptosi <2 x double> %in to <2 x i8> ret <2 x i8> %val @@ -133,8 +139,8 @@ define <2 x i8> @test_v2f64_to_unsigned_v2i8(<2 x double> %in) { ; CHECK-LABEL: test_v2f64_to_unsigned_v2i8: -; CHECK: fcvtzs.2d [[VAL64:v[0-9]+]], v0 -; CHECK: xtn.2s v0, [[VAL64]] +; CHECK: fcvtzs w +; CHECK: fcvtzs w %val = fptoui <2 x double> %in to <2 x i8> ret <2 x i8> %val Index: llvm/test/CodeGen/AArch64/complex-int-to-fp.ll =================================================================== --- llvm/test/CodeGen/AArch64/complex-int-to-fp.ll +++ llvm/test/CodeGen/AArch64/complex-int-to-fp.ll @@ -30,9 +30,11 @@ define <2 x double> @test_signed_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone { ; CHECK-LABEL: test_signed_v2i16_to_v2f64: -; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16 -; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16 -; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: smov.h [[OP1:w[0-9]+]], v0[0] +; CHECK: fmov s[[OP2:[0-9]+]], [[OP1]] +; CHECK: smov.h [[OP3:w[0-9]+]], v0[1] +; CHECK: mov.s v[[OP2]][1], [[OP3]] +; CHECK: sshll.2d [[VAL64:v[0-9]+]], v[[OP2]], #0 ; CHECK: scvtf.2d v0, [[VAL64]] %conv = sitofp <2 x i16> %v to <2 x double> @@ -40,9 +42,11 @@ } define <2 x double> @test_unsigned_v2i16_to_v2f64(<2 x i16> %v) nounwind readnone { ; CHECK-LABEL: test_unsigned_v2i16_to_v2f64 -; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff -; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] -; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: umov.h [[OP1:w[0-9]+]], v0[0] +; CHECK: fmov s[[OP2:[0-9]+]], [[OP1]] +; CHECK: umov.h [[OP3:w[0-9]+]], v0[1] +; CHECK: mov.s v[[OP2]][1], [[OP3]] +; CHECK: ushll.2d [[VAL64:v[0-9]+]], v[[OP2]], #0 ; CHECK: ucvtf.2d v0, [[VAL64]] %conv = uitofp <2 x i16> %v to <2 x double> @@ -51,21 +55,25 @@ define <2 x double> @test_signed_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone { ; CHECK-LABEL: test_signed_v2i8_to_v2f64: -; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24 -; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24 -; CHECK: sshll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: smov.b [[OP1:w[0-9]+]], v0[0] +; CHECK: fmov s[[OP2]], [[OP1]] +; CHECK: smov.b [[OP3:w[0-9]+]], v0[1] +; CHECK: mov.s v[[OP2]][1], [[OP3]] +; CHECK: sshll.2d [[VAL64:v[0-9]+]], v[[OP2]], #0 ; CHECK: scvtf.2d v0, [[VAL64]] %conv = sitofp <2 x i8> %v to <2 x double> ret <2 x double> %conv } + define <2 x double> @test_unsigned_v2i8_to_v2f64(<2 x i8> %v) nounwind readnone { ; CHECK-LABEL: test_unsigned_v2i8_to_v2f64 -; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff -; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] -; CHECK: ushll.2d [[VAL64:v[0-9]+]], [[VAL32]], #0 +; CHECK: umov.b [[OP1:w[0-9]+]], v0[0] +; CHECK: fmov s[[OP2]], [[OP1]] +; CHECK: umov.b [[OP3:w[0-9]+]], v0[1] +; CHECK: mov.s v[[OP2]][1], [[OP3]] +; CHECK: ushll.2d [[VAL64:v[0-9]+]], v[[OP2]], #0 ; CHECK: ucvtf.2d v0, [[VAL64]] - %conv = uitofp <2 x i8> %v to <2 x double> ret <2 x double> %conv } @@ -89,18 +97,16 @@ define <2 x float> @test_signed_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone { ; CHECK-LABEL: test_signed_v2i16_to_v2f32: -; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #16 -; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #16 -; CHECK: scvtf.2s v0, [[VAL32]] +; CHECK: sshll.4s [[VAL32:v[0-9]+]], v0, #0 +; CHECK: scvtf.4s v0, [[VAL32]] %conv = sitofp <2 x i16> %v to <2 x float> ret <2 x float> %conv } define <2 x float> @test_unsigned_v2i16_to_v2f32(<2 x i16> %v) nounwind readnone { ; CHECK-LABEL: test_unsigned_v2i16_to_v2f32 -; CHECK: movi d[[MASK:[0-9]+]], #0x00ffff0000ffff -; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] -; CHECK: ucvtf.2s v0, [[VAL32]] +; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0 +; CHECK: ucvtf.4s v0, [[VAL32]] %conv = uitofp <2 x i16> %v to <2 x float> ret <2 x float> %conv @@ -108,18 +114,22 @@ define <2 x float> @test_signed_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone { ; CHECK-LABEL: test_signed_v2i8_to_v2f32: -; CHECK: shl.2s [[TMP:v[0-9]+]], v0, #24 -; CHECK: sshr.2s [[VAL32:v[0-9]+]], [[TMP]], #24 -; CHECK: scvtf.2s v0, [[VAL32]] +; CHECK: smov.b [[OP1:w[0-9]+]], v0[0] +; CHECK: fmov s[[OP2:[0-9]+]], [[OP1]] +; CHECK: smov.b [[OP3:w[0-9]+]], v0[1] +; CHECK: mov.s v[[OP2]][1], [[OP3]] +; CHECK: scvtf.2s v0, v[[OP2]] %conv = sitofp <2 x i8> %v to <2 x float> ret <2 x float> %conv } define <2 x float> @test_unsigned_v2i8_to_v2f32(<2 x i8> %v) nounwind readnone { ; CHECK-LABEL: test_unsigned_v2i8_to_v2f32 -; CHECK: movi d[[MASK:[0-9]+]], #0x0000ff000000ff -; CHECK: and.8b [[VAL32:v[0-9]+]], v0, v[[MASK]] -; CHECK: ucvtf.2s v0, [[VAL32]] +; CHECK: umov.b [[OP1:w[0-9]+]], v0[0] +; CHECK: fmov s[[OP2:[0-9]+]], [[OP1]] +; CHECK: umov.b [[OP3:w[0-9]+]], v0[1] +; CHECK: mov.s v[[OP2]][1], [[OP3]] +; CHECK: ucvtf.2s v0, v[[OP2]] %conv = uitofp <2 x i8> %v to <2 x float> ret <2 x float> %conv @@ -145,19 +155,30 @@ define <4 x float> @test_signed_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone { ; CHECK-LABEL: test_signed_v4i8_to_v4f32: -; CHECK: shl.4h [[TMP:v[0-9]+]], v0, #8 -; CHECK: sshr.4h [[VAL16:v[0-9]+]], [[TMP]], #8 -; CHECK: sshll.4s [[VAL32:v[0-9]+]], [[VAL16]], #0 -; CHECK: scvtf.4s v0, [[VAL32]] +; CHECK: smov.b [[OP1:w[0-9]+]], v0[0] +; CHECK: smov.b [[OP2:w[0-9]+]], v0[1] +; CHECK: fmov s[[OP3:[0-9]+]], [[OP1]] +; CHECK: smov.b [[OP4:w[0-9]+]], v0[2] +; CHECK: mov.s v[[OP3]][1], [[OP2]] +; CHECK: mov.s v[[OP3]][2], [[OP4]] +; CHECK: smov.b [[OP5:w[0-9]+]], v0[3] +; CHECK: mov.s v[[OP3]][3], [[OP5]] +; CHECK: scvtf.4s v0, v[[OP3]] %conv = sitofp <4 x i8> %v to <4 x float> ret <4 x float> %conv } define <4 x float> @test_unsigned_v4i8_to_v4f32(<4 x i8> %v) nounwind readnone { ; CHECK-LABEL: test_unsigned_v4i8_to_v4f32 -; CHECK: bic.4h v0, #255, lsl #8 -; CHECK: ushll.4s [[VAL32:v[0-9]+]], v0, #0 -; CHECK: ucvtf.4s v0, [[VAL32]] +; CHECK: umov.b [[OP1:w[0-9]+]], v0[0] +; CHECK: umov.b [[OP2:w[0-9]+]], v0[1] +; CHECK: fmov s[[OP3:[0-9]+]], [[OP1]] +; CHECK: umov.b [[OP4:w[0-9]+]], v0[2] +; CHECK: mov.s v[[OP3]][1], [[OP2]] +; CHECK: mov.s v[[OP3]][2], [[OP4]] +; CHECK: umov.b [[OP5:w[0-9]+]], v0[3] +; CHECK: mov.s v[[OP3]][3], [[OP5]] +; CHECK: ucvtf.4s v0, v[[OP3]] %conv = uitofp <4 x i8> %v to <4 x float> ret <4 x float> %conv Index: llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -154,9 +154,8 @@ define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { ; CHECK-COMMON-LABEL: sitofp_i8: -; CHECK-COMMON-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8 -; CHECK-COMMON-NEXT: sshr [[OP2:v[0-9]+\.4h]], [[OP1]], #8 -; CHECK-COMMON-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]], #0 +; CHECK-COMMON-NEXT: sshll [[OP2:v[0-9]+]].8h, v0.8b, #0 +; CHECK-COMMON-NEXT: sshll [[OP3:v[0-9]+\.4s]], [[OP2]].4h, #0 ; CHECK-COMMON-NEXT: scvtf [[OP4:v[0-9]+\.4s]], [[OP3]] ; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP4]] ; CHECK-COMMON-NEXT: ret @@ -200,8 +199,8 @@ define <4 x half> @uitofp_i8(<4 x i8> %a) #0 { ; CHECK-COMMON-LABEL: uitofp_i8: -; CHECK-COMMON-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-COMMON-NEXT: ushll [[OP1:v[0-9]+\.4s]], v0.4h, #0 +; CHECK-COMMON-NEXT: ushll [[OP0:v[0-9]+]].8h, v0.8b, #0 +; CHECK-COMMON-NEXT: ushll [[OP1:v[0-9]+\.4s]], [[OP0]].4h, #0 ; CHECK-COMMON-NEXT: ucvtf [[OP2:v[0-9]+\.4s]], [[OP1]] ; CHECK-COMMON-NEXT: fcvtn v0.4h, [[OP2]] ; CHECK-COMMON-NEXT: ret @@ -256,7 +255,10 @@ ; CHECK-COMMON-LABEL: fptosi_i8: ; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h ; CHECK-COMMON-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] -; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]] +; CHECK-COMMON-NEXT: xtn [[REG3:v[0-9]+]].4h, [[REG2]] +; CHECK-COMMON-NEXT: fcvtzs [[REG4:v[0-9]+\.4s]], [[REG3]].4s +; CHECK-COMMON-NEXT: xtn2 [[REG5:v[0-9]+\.8h]], [[REG4]] +; CHECK-COMMON-NEXT: xtn v0.8b, [[REG5]] ; CHECK-COMMON-NEXT: ret %1 = fptosi<4 x half> %a to <4 x i8> ret <4 x i8> %1 @@ -275,9 +277,11 @@ define <4 x i8> @fptoui_i8(<4 x half> %a) #0 { ; CHECK-COMMON-LABEL: fptoui_i8: ; CHECK-COMMON-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h -; NOTE: fcvtzs selected here because the xtn shaves the sign bit -; CHECK-COMMON-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] -; CHECK-COMMON-NEXT: xtn v0.4h, [[REG2]] +; CHECK-COMMON-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-COMMON-NEXT: xtn [[REG3:v[0-9]+]].4h, [[REG2]] +; CHECK-COMMON-NEXT: fcvtzu [[REG4:v[0-9]+\.4s]], [[REG3]].4s +; CHECK-COMMON-NEXT: xtn2 [[REG5:v[0-9]+\.8h]], [[REG4]] +; CHECK-COMMON-NEXT: xtn v0.8b, [[REG5]] ; CHECK-COMMON-NEXT: ret %1 = fptoui<4 x half> %a to <4 x i8> ret <4 x i8> %1 Index: llvm/test/CodeGen/AArch64/neon-perm.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-perm.ll +++ llvm/test/CodeGen/AArch64/neon-perm.ll @@ -1389,7 +1389,7 @@ define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) { ; CHECK-LABEL: test_vzip1_v4i8: -; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b +; CHECK: ret %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> ret <4 x i8> %lo } Index: llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll +++ llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll @@ -33,7 +33,7 @@ ; Test a vector load IR and a sext/zext IR can be selected correctly. define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) { ; CHECK-LABEL: loadSExt.v4i8: -; CHECK: ldrsb +; CHECK: ldr {{s[0-9]+}}, [x0] %a = load <4 x i8>, <4 x i8>* %ref %conv = sext <4 x i8> %a to <4 x i32> ret <4 x i32> %conv @@ -41,7 +41,7 @@ define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) { ; CHECK-LABEL: loadZExt.v4i8: -; CHECK: ldrb +; CHECK: ldr {{s[0-9]+}}, [x0] %a = load <4 x i8>, <4 x i8>* %ref %conv = zext <4 x i8> %a to <4 x i32> ret <4 x i32> %conv @@ -49,7 +49,7 @@ define i32 @loadExt.i32(<4 x i8>* %ref) { ; CHECK-LABEL: loadExt.i32: -; CHECK: ldrb +; CHECK: ldr {{s[0-9]+}}, [x0] %a = load <4 x i8>, <4 x i8>* %ref %vecext = extractelement <4 x i8> %a, i32 0 %conv = zext i8 %vecext to i32 Index: llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll @@ -11,11 +11,10 @@ %pair = type { i8, i8 } ; CHECK-LABEL: test -; CHECK: Found an estimated cost of 20 for VF 2 For instruction: {{.*}} load i8 +; CHECK: Found an estimated cost of 16 for VF 2 For instruction: {{.*}} load i8 ; CHECK: Found an estimated cost of 0 for VF 2 For instruction: {{.*}} load i8 ; CHECK: vector.body -; CHECK: load i8 -; CHECK: load i8 +; CHECK: load <4 x i8> ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body define void @test(%pair* %p, i64 %n) { Index: llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll +++ llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll @@ -15,18 +15,18 @@ ; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: -; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[XTMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], undef -; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef -; DEFAULT-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef -; DEFAULT-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef -; DEFAULT-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], undef -; DEFAULT-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], undef -; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef +; DEFAULT-NEXT: [[XTMP20:%.*]] = add i32 [[XTMP17]], undef +; DEFAULT-NEXT: [[XTMP22:%.*]] = add i32 [[XTMP20]], undef +; DEFAULT-NEXT: [[XTMP24:%.*]] = add i32 [[XTMP22]], undef +; DEFAULT-NEXT: [[XTMP26:%.*]] = add i32 [[XTMP24]], undef +; DEFAULT-NEXT: [[XTMP28:%.*]] = add i32 [[XTMP26]], undef +; DEFAULT-NEXT: [[XTMP30:%.*]] = add i32 [[XTMP28]], undef +; DEFAULT-NEXT: [[XTMP32:%.*]] = add i32 [[XTMP30]], undef ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]]) -; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[TMP17]] -; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef +; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[XTMP17]] +; DEFAULT-NEXT: [[XTMP34:%.*]] = add i32 [[XTMP32]], undef ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR28330( @@ -35,7 +35,7 @@ ; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[TMPP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[XTMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 ; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0 ; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 @@ -54,19 +54,19 @@ ; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7 ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> , <8 x i32> ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0 -; GATHER-NEXT: [[TMPP20:%.*]] = add i32 [[TMPP17]], [[TMP19]] +; GATHER-NEXT: [[XTMP20:%.*]] = add i32 [[XTMP17]], [[TMP19]] ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1 -; GATHER-NEXT: [[TMPP22:%.*]] = add i32 [[TMPP20]], [[TMP20]] +; GATHER-NEXT: [[XTMP22:%.*]] = add i32 [[XTMP20]], [[TMP20]] ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2 -; GATHER-NEXT: [[TMPP24:%.*]] = add i32 [[TMPP22]], [[TMP21]] +; GATHER-NEXT: [[XTMP24:%.*]] = add i32 [[XTMP22]], [[TMP21]] ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3 -; GATHER-NEXT: [[TMPP26:%.*]] = add i32 [[TMPP24]], [[TMP22]] +; GATHER-NEXT: [[XTMP26:%.*]] = add i32 [[XTMP24]], [[TMP22]] ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4 -; GATHER-NEXT: [[TMPP28:%.*]] = add i32 [[TMPP26]], [[TMP23]] +; GATHER-NEXT: [[XTMP28:%.*]] = add i32 [[XTMP26]], [[TMP23]] ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5 -; GATHER-NEXT: [[TMPP30:%.*]] = add i32 [[TMPP28]], [[TMP24]] +; GATHER-NEXT: [[XTMP30:%.*]] = add i32 [[XTMP28]], [[TMP24]] ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6 -; GATHER-NEXT: [[TMPP32:%.*]] = add i32 [[TMPP30]], [[TMP25]] +; GATHER-NEXT: [[XTMP32:%.*]] = add i32 [[XTMP30]], [[TMP25]] ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1 ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2 @@ -77,86 +77,86 @@ ; GATHER-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP18]], i32 7 ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 ; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP34]]) -; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[TMPP17]] -; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMPP32]], [[TMP33]] +; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], [[XTMP17]] +; GATHER-NEXT: [[XTMP34:%.*]] = add i32 [[XTMP32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR28330( ; MAX-COST-NEXT: entry: -; MAX-COST-NEXT: [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 -; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0 -; MAX-COST-NEXT: [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 -; MAX-COST-NEXT: [[TMP3:%.*]] = icmp eq i8 [[TMP2]], 0 -; MAX-COST-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 -; MAX-COST-NEXT: [[TMP5:%.*]] = icmp eq i8 [[TMP4]], 0 -; MAX-COST-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 -; MAX-COST-NEXT: [[TMP7:%.*]] = icmp eq i8 [[TMP6]], 0 -; MAX-COST-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 -; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0 -; MAX-COST-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 -; MAX-COST-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0 -; MAX-COST-NEXT: [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 -; MAX-COST-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0 -; MAX-COST-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 -; MAX-COST-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 +; MAX-COST-NEXT: [[XTMP0:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 +; MAX-COST-NEXT: [[XTMP1:%.*]] = icmp eq i8 [[XTMP0]], 0 +; MAX-COST-NEXT: [[XTMP2:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 +; MAX-COST-NEXT: [[XTMP3:%.*]] = icmp eq i8 [[XTMP2]], 0 +; MAX-COST-NEXT: [[XTMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 +; MAX-COST-NEXT: [[XTMP5:%.*]] = icmp eq i8 [[XTMP4]], 0 +; MAX-COST-NEXT: [[XTMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 +; MAX-COST-NEXT: [[XTMP7:%.*]] = icmp eq i8 [[XTMP6]], 0 +; MAX-COST-NEXT: [[XTMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 +; MAX-COST-NEXT: [[XTMP9:%.*]] = icmp eq i8 [[XTMP8]], 0 +; MAX-COST-NEXT: [[XTMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 +; MAX-COST-NEXT: [[XTMP11:%.*]] = icmp eq i8 [[XTMP10]], 0 +; MAX-COST-NEXT: [[XTMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 +; MAX-COST-NEXT: [[XTMP13:%.*]] = icmp eq i8 [[XTMP12]], 0 +; MAX-COST-NEXT: [[XTMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 +; MAX-COST-NEXT: [[XTMP15:%.*]] = icmp eq i8 [[XTMP14]], 0 ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: -; MAX-COST-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP19:%.*]] = select i1 [[TMP1]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 [[TMP17]], [[TMP19]] -; MAX-COST-NEXT: [[TMP21:%.*]] = select i1 [[TMP3]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], [[TMP21]] -; MAX-COST-NEXT: [[TMP23:%.*]] = select i1 [[TMP5]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], [[TMP23]] -; MAX-COST-NEXT: [[TMP25:%.*]] = select i1 [[TMP7]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], [[TMP25]] -; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]] -; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]] -; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], [[TMP31]] -; MAX-COST-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP34]] = add i32 [[TMP32]], [[TMP33]] +; MAX-COST-NEXT: [[XTMP17:%.*]] = phi i32 [ [[XTMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; MAX-COST-NEXT: [[XTMP19:%.*]] = select i1 [[XTMP1]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[XTMP20:%.*]] = add i32 [[XTMP17]], [[XTMP19]] +; MAX-COST-NEXT: [[XTMP21:%.*]] = select i1 [[XTMP3]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[XTMP22:%.*]] = add i32 [[XTMP20]], [[XTMP21]] +; MAX-COST-NEXT: [[XTMP23:%.*]] = select i1 [[XTMP5]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[XTMP24:%.*]] = add i32 [[XTMP22]], [[XTMP23]] +; MAX-COST-NEXT: [[XTMP25:%.*]] = select i1 [[XTMP7]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[XTMP26:%.*]] = add i32 [[XTMP24]], [[XTMP25]] +; MAX-COST-NEXT: [[XTMP27:%.*]] = select i1 [[XTMP9]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[XTMP28:%.*]] = add i32 [[XTMP26]], [[XTMP27]] +; MAX-COST-NEXT: [[XTMP29:%.*]] = select i1 [[XTMP11]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[XTMP30:%.*]] = add i32 [[XTMP28]], [[XTMP29]] +; MAX-COST-NEXT: [[XTMP31:%.*]] = select i1 [[XTMP13]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[XTMP32:%.*]] = add i32 [[XTMP30]], [[XTMP31]] +; MAX-COST-NEXT: [[XTMP33:%.*]] = select i1 [[XTMP15]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[XTMP34]] = add i32 [[XTMP32]], [[XTMP33]] ; MAX-COST-NEXT: br label [[FOR_BODY]] ; entry: - %tmp0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 - %tmp1 = icmp eq i8 %tmp0, 0 - %tmp2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 - %tmp3 = icmp eq i8 %tmp2, 0 - %tmp4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 - %tmp5 = icmp eq i8 %tmp4, 0 - %tmp6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 - %tmp7 = icmp eq i8 %tmp6, 0 - %tmp8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 - %tmp9 = icmp eq i8 %tmp8, 0 - %tmp10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 - %tmp11 = icmp eq i8 %tmp10, 0 - %tmp12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 - %tmp13 = icmp eq i8 %tmp12, 0 - %tmp14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 - %tmp15 = icmp eq i8 %tmp14, 0 + %xtmp0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 + %xtmp1 = icmp eq i8 %xtmp0, 0 + %xtmp2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 + %xtmp3 = icmp eq i8 %xtmp2, 0 + %xtmp4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 + %xtmp5 = icmp eq i8 %xtmp4, 0 + %xtmp6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 + %xtmp7 = icmp eq i8 %xtmp6, 0 + %xtmp8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 + %xtmp9 = icmp eq i8 %xtmp8, 0 + %xtmp10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 + %xtmp11 = icmp eq i8 %xtmp10, 0 + %xtmp12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 + %xtmp13 = icmp eq i8 %xtmp12, 0 + %xtmp14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 + %xtmp15 = icmp eq i8 %xtmp14, 0 br label %for.body for.body: - %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ] - %tmp19 = select i1 %tmp1, i32 -720, i32 -80 - %tmp20 = add i32 %tmp17, %tmp19 - %tmp21 = select i1 %tmp3, i32 -720, i32 -80 - %tmp22 = add i32 %tmp20, %tmp21 - %tmp23 = select i1 %tmp5, i32 -720, i32 -80 - %tmp24 = add i32 %tmp22, %tmp23 - %tmp25 = select i1 %tmp7, i32 -720, i32 -80 - %tmp26 = add i32 %tmp24, %tmp25 - %tmp27 = select i1 %tmp9, i32 -720, i32 -80 - %tmp28 = add i32 %tmp26, %tmp27 - %tmp29 = select i1 %tmp11, i32 -720, i32 -80 - %tmp30 = add i32 %tmp28, %tmp29 - %tmp31 = select i1 %tmp13, i32 -720, i32 -80 - %tmp32 = add i32 %tmp30, %tmp31 - %tmp33 = select i1 %tmp15, i32 -720, i32 -80 - %tmp34 = add i32 %tmp32, %tmp33 + %xtmp17 = phi i32 [ %xtmp34, %for.body ], [ 0, %entry ] + %xtmp19 = select i1 %xtmp1, i32 -720, i32 -80 + %xtmp20 = add i32 %xtmp17, %xtmp19 + %xtmp21 = select i1 %xtmp3, i32 -720, i32 -80 + %xtmp22 = add i32 %xtmp20, %xtmp21 + %xtmp23 = select i1 %xtmp5, i32 -720, i32 -80 + %xtmp24 = add i32 %xtmp22, %xtmp23 + %xtmp25 = select i1 %xtmp7, i32 -720, i32 -80 + %xtmp26 = add i32 %xtmp24, %xtmp25 + %xtmp27 = select i1 %xtmp9, i32 -720, i32 -80 + %xtmp28 = add i32 %xtmp26, %xtmp27 + %xtmp29 = select i1 %xtmp11, i32 -720, i32 -80 + %xtmp30 = add i32 %xtmp28, %xtmp29 + %xtmp31 = select i1 %xtmp13, i32 -720, i32 -80 + %xtmp32 = add i32 %xtmp30, %xtmp31 + %xtmp33 = select i1 %xtmp15, i32 -720, i32 -80 + %xtmp34 = add i32 %xtmp32, %xtmp33 br label %for.body } @@ -167,18 +167,18 @@ ; DEFAULT-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; DEFAULT-NEXT: br label [[FOR_BODY:%.*]] ; DEFAULT: for.body: -; DEFAULT-NEXT: [[TMP17:%.*]] = phi i32 [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; DEFAULT-NEXT: [[X17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> , <8 x i32> -; DEFAULT-NEXT: [[TMP20:%.*]] = add i32 -5, undef -; DEFAULT-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef -; DEFAULT-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef -; DEFAULT-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef -; DEFAULT-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], undef -; DEFAULT-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], undef -; DEFAULT-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], undef +; DEFAULT-NEXT: [[X20:%.*]] = add i32 -5, undef +; DEFAULT-NEXT: [[X22:%.*]] = add i32 [[X20]], undef +; DEFAULT-NEXT: [[X24:%.*]] = add i32 [[X22]], undef +; DEFAULT-NEXT: [[X26:%.*]] = add i32 [[X24]], undef +; DEFAULT-NEXT: [[X28:%.*]] = add i32 [[X26]], undef +; DEFAULT-NEXT: [[X30:%.*]] = add i32 [[X28]], undef +; DEFAULT-NEXT: [[X32:%.*]] = add i32 [[X30]], undef ; DEFAULT-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP2]]) -; DEFAULT-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], -5 -; DEFAULT-NEXT: [[TMP34:%.*]] = add i32 [[TMP32]], undef +; DEFAULT-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], -5 +; DEFAULT-NEXT: [[X34:%.*]] = add i32 [[X32]], undef ; DEFAULT-NEXT: br label [[FOR_BODY]] ; ; GATHER-LABEL: @PR32038( @@ -187,7 +187,7 @@ ; GATHER-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[TMP0]], zeroinitializer ; GATHER-NEXT: br label [[FOR_BODY:%.*]] ; GATHER: for.body: -; GATHER-NEXT: [[TMP17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; GATHER-NEXT: [[X17:%.*]] = phi i32 [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0 ; GATHER-NEXT: [[TMP3:%.*]] = insertelement <8 x i1> undef, i1 [[TMP2]], i32 0 ; GATHER-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1 @@ -206,19 +206,19 @@ ; GATHER-NEXT: [[TMP17:%.*]] = insertelement <8 x i1> [[TMP15]], i1 [[TMP16]], i32 7 ; GATHER-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i32> , <8 x i32> ; GATHER-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP18]], i32 0 -; GATHER-NEXT: [[TMPP20:%.*]] = add i32 -5, [[TMP19]] +; GATHER-NEXT: [[X20:%.*]] = add i32 -5, [[TMP19]] ; GATHER-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP18]], i32 1 -; GATHER-NEXT: [[TMPP22:%.*]] = add i32 [[TMPP20]], [[TMP20]] +; GATHER-NEXT: [[X22:%.*]] = add i32 [[X20]], [[TMP20]] ; GATHER-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP18]], i32 2 -; GATHER-NEXT: [[TMPP24:%.*]] = add i32 [[TMPP22]], [[TMP21]] +; GATHER-NEXT: [[X24:%.*]] = add i32 [[X22]], [[TMP21]] ; GATHER-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP18]], i32 3 -; GATHER-NEXT: [[TMPP26:%.*]] = add i32 [[TMPP24]], [[TMP22]] +; GATHER-NEXT: [[X26:%.*]] = add i32 [[X24]], [[TMP22]] ; GATHER-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP18]], i32 4 -; GATHER-NEXT: [[TMPP28:%.*]] = add i32 [[TMPP26]], [[TMP23]] +; GATHER-NEXT: [[X28:%.*]] = add i32 [[X26]], [[TMP23]] ; GATHER-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP18]], i32 5 -; GATHER-NEXT: [[TMPP30:%.*]] = add i32 [[TMPP28]], [[TMP24]] +; GATHER-NEXT: [[X30:%.*]] = add i32 [[X28]], [[TMP24]] ; GATHER-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP18]], i32 6 -; GATHER-NEXT: [[TMPP32:%.*]] = add i32 [[TMPP30]], [[TMP25]] +; GATHER-NEXT: [[X32:%.*]] = add i32 [[X30]], [[TMP25]] ; GATHER-NEXT: [[TMP26:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0 ; GATHER-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> [[TMP26]], i32 [[TMP20]], i32 1 ; GATHER-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP21]], i32 2 @@ -230,89 +230,87 @@ ; GATHER-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP33]], i32 7 ; GATHER-NEXT: [[TMP35:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v8i32(<8 x i32> [[TMP34]]) ; GATHER-NEXT: [[OP_EXTRA]] = add i32 [[TMP35]], -5 -; GATHER-NEXT: [[TMP34:%.*]] = add i32 [[TMPP32]], [[TMP33]] +; GATHER-NEXT: [[X34:%.*]] = add i32 [[X32]], [[TMP33]] ; GATHER-NEXT: br label [[FOR_BODY]] ; ; MAX-COST-LABEL: @PR32038( ; MAX-COST-NEXT: entry: -; MAX-COST-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <2 x i8>*), align 1 -; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[TMP0]], zeroinitializer -; MAX-COST-NEXT: [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 -; MAX-COST-NEXT: [[TMPP5:%.*]] = icmp eq i8 [[TMP4]], 0 -; MAX-COST-NEXT: [[TMP6:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 -; MAX-COST-NEXT: [[TMPP7:%.*]] = icmp eq i8 [[TMP6]], 0 -; MAX-COST-NEXT: [[TMP8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 -; MAX-COST-NEXT: [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0 -; MAX-COST-NEXT: [[TMP10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 -; MAX-COST-NEXT: [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0 -; MAX-COST-NEXT: [[TMP12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 -; MAX-COST-NEXT: [[TMP13:%.*]] = icmp eq i8 [[TMP12]], 0 -; MAX-COST-NEXT: [[TMP14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 -; MAX-COST-NEXT: [[TMP15:%.*]] = icmp eq i8 [[TMP14]], 0 +; MAX-COST-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* bitcast (i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1) to <4 x i8>*), align 1 +; MAX-COST-NEXT: [[TMP1:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer +; MAX-COST-NEXT: [[X8:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 +; MAX-COST-NEXT: [[X9:%.*]] = icmp eq i8 [[X8]], 0 +; MAX-COST-NEXT: [[X10:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 +; MAX-COST-NEXT: [[X11:%.*]] = icmp eq i8 [[X10]], 0 +; MAX-COST-NEXT: [[X12:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 +; MAX-COST-NEXT: [[X13:%.*]] = icmp eq i8 [[X12]], 0 +; MAX-COST-NEXT: [[X14:%.*]] = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 +; MAX-COST-NEXT: [[X15:%.*]] = icmp eq i8 [[X14]], 0 ; MAX-COST-NEXT: br label [[FOR_BODY:%.*]] ; MAX-COST: for.body: -; MAX-COST-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 +; MAX-COST-NEXT: [[X17:%.*]] = phi i32 [ [[X34:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; MAX-COST-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 ; MAX-COST-NEXT: [[TMP3:%.*]] = insertelement <4 x i1> undef, i1 [[TMP2]], i32 0 -; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 +; MAX-COST-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 ; MAX-COST-NEXT: [[TMP5:%.*]] = insertelement <4 x i1> [[TMP3]], i1 [[TMP4]], i32 1 -; MAX-COST-NEXT: [[TMP6:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[TMPP5]], i32 2 -; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMPP7]], i32 3 -; MAX-COST-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> , <4 x i32> -; MAX-COST-NEXT: [[TMP20:%.*]] = add i32 -5, undef -; MAX-COST-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], undef -; MAX-COST-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], undef -; MAX-COST-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], undef -; MAX-COST-NEXT: [[TMP27:%.*]] = select i1 [[TMP9]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], [[TMP27]] -; MAX-COST-NEXT: [[TMP29:%.*]] = select i1 [[TMP11]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP9:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP8]]) -; MAX-COST-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], [[TMP27]] -; MAX-COST-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[TMP29]] -; MAX-COST-NEXT: [[BIN_EXTRA:%.*]] = add i32 [[TMP11]], -5 -; MAX-COST-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP29]] -; MAX-COST-NEXT: [[TMP31:%.*]] = select i1 [[TMP13]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP32:%.*]] = add i32 [[BIN_EXTRA]], [[TMP31]] -; MAX-COST-NEXT: [[TMP33:%.*]] = select i1 [[TMP15]], i32 -720, i32 -80 -; MAX-COST-NEXT: [[TMP34]] = add i32 [[TMP32]], [[TMP33]] +; MAX-COST-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2 +; MAX-COST-NEXT: [[TMP7:%.*]] = insertelement <4 x i1> [[TMP5]], i1 [[TMP6]], i32 2 +; MAX-COST-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3 +; MAX-COST-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> [[TMP7]], i1 [[TMP8]], i32 3 +; MAX-COST-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> , <4 x i32> +; MAX-COST-NEXT: [[X20:%.*]] = add i32 -5, undef +; MAX-COST-NEXT: [[X22:%.*]] = add i32 [[X20]], undef +; MAX-COST-NEXT: [[X24:%.*]] = add i32 [[X22]], undef +; MAX-COST-NEXT: [[X26:%.*]] = add i32 [[X24]], undef +; MAX-COST-NEXT: [[X27:%.*]] = select i1 [[X9]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[X28:%.*]] = add i32 [[X26]], [[X27]] +; MAX-COST-NEXT: [[X29:%.*]] = select i1 [[X11]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP10]]) +; MAX-COST-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], [[X27]] +; MAX-COST-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[X29]] +; MAX-COST-NEXT: [[OP_EXTRA:%.*]] = add i32 [[TMP13]], -5 +; MAX-COST-NEXT: [[X30:%.*]] = add i32 [[X28]], [[X29]] +; MAX-COST-NEXT: [[X31:%.*]] = select i1 [[X13]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[X32:%.*]] = add i32 [[OP_EXTRA]], [[X31]] +; MAX-COST-NEXT: [[X33:%.*]] = select i1 [[X15]], i32 -720, i32 -80 +; MAX-COST-NEXT: [[X34]] = add i32 [[X32]], [[X33]] ; MAX-COST-NEXT: br label [[FOR_BODY]] ; entry: - %tmp0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 - %tmp1 = icmp eq i8 %tmp0, 0 - %tmp2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 - %tmp3 = icmp eq i8 %tmp2, 0 - %tmp4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 - %tmp5 = icmp eq i8 %tmp4, 0 - %tmp6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 - %tmp7 = icmp eq i8 %tmp6, 0 - %tmp8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 - %tmp9 = icmp eq i8 %tmp8, 0 - %tmp10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 - %tmp11 = icmp eq i8 %tmp10, 0 - %tmp12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 - %tmp13 = icmp eq i8 %tmp12, 0 - %tmp14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 - %tmp15 = icmp eq i8 %tmp14, 0 + %x0 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 1), align 1 + %x1 = icmp eq i8 %x0, 0 + %x2 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 2), align 2 + %x3 = icmp eq i8 %x2, 0 + %x4 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 3), align 1 + %x5 = icmp eq i8 %x4, 0 + %x6 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 4), align 4 + %x7 = icmp eq i8 %x6, 0 + %x8 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 5), align 1 + %x9 = icmp eq i8 %x8, 0 + %x10 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 6), align 2 + %x11 = icmp eq i8 %x10, 0 + %x12 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 7), align 1 + %x13 = icmp eq i8 %x12, 0 + %x14 = load i8, i8* getelementptr inbounds ([80 x i8], [80 x i8]* @a, i64 0, i64 8), align 8 + %x15 = icmp eq i8 %x14, 0 br label %for.body for.body: - %tmp17 = phi i32 [ %tmp34, %for.body ], [ 0, %entry ] - %tmp19 = select i1 %tmp1, i32 -720, i32 -80 - %tmp20 = add i32 -5, %tmp19 - %tmp21 = select i1 %tmp3, i32 -720, i32 -80 - %tmp22 = add i32 %tmp20, %tmp21 - %tmp23 = select i1 %tmp5, i32 -720, i32 -80 - %tmp24 = add i32 %tmp22, %tmp23 - %tmp25 = select i1 %tmp7, i32 -720, i32 -80 - %tmp26 = add i32 %tmp24, %tmp25 - %tmp27 = select i1 %tmp9, i32 -720, i32 -80 - %tmp28 = add i32 %tmp26, %tmp27 - %tmp29 = select i1 %tmp11, i32 -720, i32 -80 - %tmp30 = add i32 %tmp28, %tmp29 - %tmp31 = select i1 %tmp13, i32 -720, i32 -80 - %tmp32 = add i32 %tmp30, %tmp31 - %tmp33 = select i1 %tmp15, i32 -720, i32 -80 - %tmp34 = add i32 %tmp32, %tmp33 + %x17 = phi i32 [ %x34, %for.body ], [ 0, %entry ] + %x19 = select i1 %x1, i32 -720, i32 -80 + %x20 = add i32 -5, %x19 + %x21 = select i1 %x3, i32 -720, i32 -80 + %x22 = add i32 %x20, %x21 + %x23 = select i1 %x5, i32 -720, i32 -80 + %x24 = add i32 %x22, %x23 + %x25 = select i1 %x7, i32 -720, i32 -80 + %x26 = add i32 %x24, %x25 + %x27 = select i1 %x9, i32 -720, i32 -80 + %x28 = add i32 %x26, %x27 + %x29 = select i1 %x11, i32 -720, i32 -80 + %x30 = add i32 %x28, %x29 + %x31 = select i1 %x13, i32 -720, i32 -80 + %x32 = add i32 %x30, %x31 + %x33 = select i1 %x15, i32 -720, i32 -80 + %x34 = add i32 %x32, %x33 br label %for.body }