Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -731,14 +731,9 @@ // If this is a zext/sext of a load, return 0 if the corresponding // extending load exists on target. - if (I && isa(I->getOperand(0))) { - EVT ExtVT = EVT::getEVT(Dst); - EVT LoadVT = EVT::getEVT(Src); - unsigned LType = - ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); - if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) + if (auto LI = dyn_cast(I->getOperand(0))) + if (Src == LI->getType() && getTLI()->isExtLoad(LI, I, DL)) return 0; - } break; case Instruction::AddrSpaceCast: if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), Index: llvm/lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2048,6 +2048,11 @@ return AdjustCost(Entry->Cost); } + if (I && (ISD::SIGN_EXTEND == ISD || ISD::ZERO_EXTEND == ISD)) + if (auto LI = dyn_cast(I->getOperand(0))) + if (TLI->isExtLoad(LI, I, DL)) + return 0; + return AdjustCost(BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I)); } Index: llvm/test/Analysis/CostModel/ARM/cast.ll =================================================================== --- llvm/test/Analysis/CostModel/ARM/cast.ll +++ llvm/test/Analysis/CostModel/ARM/cast.ll @@ -396,6 +396,7 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'casts' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -782,6 +783,7 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2088 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2048 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'casts' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -1168,6 +1170,7 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'casts' ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -1554,6 +1557,7 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'casts' ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -1940,6 +1944,7 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'casts' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -2326,6 +2331,7 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'casts' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -2712,6 +2718,7 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'casts' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -3098,6 +3105,7 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'casts' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r0 = sext i1 undef to i8 @@ -3484,6 +3492,7 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r247 = sitofp <16 x i16> undef to <16 x double> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r248 = uitofp <16 x i64> undef to <16 x double> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r249 = sitofp <16 x i64> undef to <16 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %r0 = sext i1 undef to i8 %r1 = zext i1 undef to i8 @@ -3943,8 +3952,8 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64> @@ -3953,8 +3962,8 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64> -; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32> @@ -3983,6 +3992,7 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'load_extends' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -4049,6 +4059,7 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_extends' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -4115,6 +4126,7 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'load_extends' ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -4181,6 +4193,7 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'load_extends' ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -4207,8 +4220,8 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64> @@ -4217,8 +4230,8 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64> -; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32> @@ -4247,6 +4260,7 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'load_extends' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -4313,6 +4327,7 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'load_extends' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -4379,6 +4394,7 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'load_extends' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -4445,6 +4461,7 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'load_extends' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadi8 = load i8, i8* undef, align 1 @@ -4471,8 +4488,8 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r9 = zext i16 %loadi16 to i64 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r10 = sext i32 %loadi32 to i64 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r11 = zext i32 %loadi32 to i64 -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816s = sext <2 x i8> %loadv2i8 to <2 x i16> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2816u = zext <2 x i8> %loadv2i8 to <2 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832s = sext <2 x i8> %loadv2i8 to <2 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2832u = zext <2 x i8> %loadv2i8 to <2 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2864s = sext <2 x i8> %loadv2i8 to <2 x i64> @@ -4481,8 +4498,8 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4816u = zext <4 x i8> %loadv4i8 to <4 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832s = sext <4 x i8> %loadv4i8 to <4 x i32> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4832u = zext <4 x i8> %loadv4i8 to <4 x i32> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64> -; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864s = sext <4 x i8> %loadv4i8 to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4864u = zext <4 x i8> %loadv4i8 to <4 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816s = sext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8816u = zext <8 x i8> %loadv8i8 to <8 x i16> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8832s = sext <8 x i8> %loadv8i8 to <8 x i32> @@ -4511,6 +4528,7 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v23264u = zext <2 x i32> %loadv2i32 to <2 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264s = sext <4 x i32> %loadv4i32 to <4 x i64> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v43264u = zext <4 x i32> %loadv4i32 to <4 x i64> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %loadi8 = load i8, i8* undef %loadi16 = load i16, i16* undef @@ -4638,6 +4656,7 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'store_trunc' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -4692,6 +4711,7 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'store_trunc' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -4746,6 +4766,7 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'store_trunc' ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -4800,6 +4821,7 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'store_trunc' ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -4854,6 +4876,7 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'store_trunc' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -4908,6 +4931,7 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'store_trunc' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -4962,6 +4986,7 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'store_trunc' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -5016,6 +5041,7 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'store_trunc' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i816 = trunc i16 undef to i8 @@ -5070,6 +5096,7 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %v81664, <8 x i16>* undef, align 16 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i32> %v23264, <2 x i32>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %v43264, <4 x i32>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %i816 = trunc i16 undef to i8 %i832 = trunc i32 undef to i8 @@ -5156,6 +5183,7 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'load_fpextends' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -5181,6 +5209,7 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fpextends' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -5206,6 +5235,7 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'load_fpextends' ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -5231,6 +5261,7 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'load_fpextends' ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -5256,6 +5287,7 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'load_fpextends' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -5281,6 +5313,7 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fpextends' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -5306,6 +5339,7 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'load_fpextends' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -5331,6 +5365,7 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'load_fpextends' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %loadf16 = load half, half* undef, align 2 @@ -5356,6 +5391,7 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v9 = fpext <2 x float> %loadv2f32 to <2 x double> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v10 = fpext <4 x float> %loadv4f32 to <4 x double> ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v11 = fpext <8 x float> %loadv8f32 to <8 x double> +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %loadf16 = load half, half* undef %loadf32 = load float, float* undef @@ -5410,6 +5446,7 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'load_fptrunc' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -5434,6 +5471,7 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'load_fptrunc' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -5458,6 +5496,7 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'load_fptrunc' ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -5482,6 +5521,7 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'load_fptrunc' ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -5506,6 +5546,7 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'load_fptrunc' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -5530,6 +5571,7 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'load_fptrunc' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -5554,6 +5596,7 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'load_fptrunc' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -5578,6 +5621,7 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'load_fptrunc' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %i1632 = fptrunc float undef to half @@ -5602,6 +5646,7 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <8 x half> %v81664, <8 x half>* undef, align 16 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <2 x float> %v23264, <2 x float>* undef, align 8 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x float> %v43264, <4 x float>* undef, align 16 +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %i1632 = fptrunc float undef to half %i1664 = fptrunc double undef to half @@ -5642,6 +5687,7 @@ ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-RECIP-LABEL: 'bitcasts' ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -5652,6 +5698,7 @@ ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-MVE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-RECIP-LABEL: 'bitcasts' ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -5662,6 +5709,7 @@ ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-MAIN-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-RECIP-LABEL: 'bitcasts' ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -5672,6 +5720,7 @@ ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-BASE-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-RECIP-LABEL: 'bitcasts' ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -5682,6 +5731,7 @@ ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8R-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-MVE-SIZE-LABEL: 'bitcasts' ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -5692,6 +5742,7 @@ ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-MVE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-MAIN-SIZE-LABEL: 'bitcasts' ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -5702,6 +5753,7 @@ ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-MAIN-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8M-BASE-SIZE-LABEL: 'bitcasts' ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -5712,6 +5764,7 @@ ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8M-BASE-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; ; CHECK-V8R-SIZE-LABEL: 'bitcasts' ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32 @@ -5722,6 +5775,7 @@ ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16 ; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half +; CHECK-V8R-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %a = bitcast i32 undef to i32 %b = bitcast float undef to float Index: llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll +++ llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll @@ -6,7 +6,7 @@ ; DEBUG-OUTPUT-NOT: .loc ; DEBUG-OUTPUT-NOT: {{.*}}.debug_info -; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 1) +; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 2, interleaved count: 1) ; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4) ; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized Index: llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll +++ llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll @@ -6,7 +6,7 @@ ; DEBUG-OUTPUT-NOT: .loc ; DEBUG-OUTPUT-NOT: {{.*}}.debug_info -; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 1) +; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 2, interleaved count: 1) ; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4) ; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized Index: llvm/test/Transforms/SLPVectorizer/X86/sext.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/X86/sext.ll +++ llvm/test/Transforms/SLPVectorizer/X86/sext.ll @@ -11,15 +11,26 @@ ; define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) { -; SSE-LABEL: @loadext_2i8_to_2i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 -; SSE-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 -; SSE-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 -; SSE-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 -; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE-NEXT: ret <2 x i64> [[V1]] +; SSE2-LABEL: @loadext_2i8_to_2i64( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 +; SSE2-NEXT: ret <2 x i64> [[V1]] +; +; SLM-LABEL: @loadext_2i8_to_2i64( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 +; SLM-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 +; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SLM-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 @@ -114,23 +125,42 @@ } define <4 x i64> @loadext_4i8_to_4i64(i8* %p0) { -; SSE-LABEL: @loadext_4i8_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 -; SSE-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 -; SSE-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 -; SSE-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 -; SSE-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 -; SSE-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 -; SSE-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 -; SSE-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64 -; SSE-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64 -; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE-NEXT: ret <4 x i64> [[V3]] +; SSE2-LABEL: @loadext_4i8_to_4i64( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>* +; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1 +; SSE2-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SSE2-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> +; SSE2-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64 +; SSE2-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64 +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 +; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE2-NEXT: ret <4 x i64> [[V3]] +; +; SLM-LABEL: @loadext_4i8_to_4i64( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i8, i8* [[P0]], i64 3 +; SLM-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i8, i8* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i8, i8* [[P3]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 +; SLM-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 +; SLM-NEXT: [[X2:%.*]] = sext i8 [[I2]] to i64 +; SLM-NEXT: [[X3:%.*]] = sext i8 [[I3]] to i64 +; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SLM-NEXT: ret <4 x i64> [[V3]] ; ; AVX-LABEL: @loadext_4i8_to_4i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1 @@ -665,15 +695,26 @@ ; define <2 x i64> @loadext_2i16_to_2i64(i16* %p0) { -; SSE-LABEL: @loadext_2i16_to_2i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 -; SSE-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 -; SSE-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64 -; SSE-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64 -; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE-NEXT: ret <2 x i64> [[V1]] +; SSE2-LABEL: @loadext_2i16_to_2i64( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 +; SSE2-NEXT: ret <2 x i64> [[V1]] +; +; SLM-LABEL: @loadext_2i16_to_2i64( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64 +; SLM-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64 +; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SLM-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i16_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 @@ -768,23 +809,42 @@ } define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) { -; SSE-LABEL: @loadext_4i16_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 -; SSE-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 -; SSE-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 -; SSE-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 -; SSE-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 -; SSE-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64 -; SSE-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64 -; SSE-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64 -; SSE-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64 -; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE-NEXT: ret <4 x i64> [[V3]] +; SSE2-LABEL: @loadext_4i16_to_4i64( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>* +; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1 +; SSE2-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 +; SSE2-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64> +; SSE2-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64 +; SSE2-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64 +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 +; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE2-NEXT: ret <4 x i64> [[V3]] +; +; SLM-LABEL: @loadext_4i16_to_4i64( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3 +; SLM-NEXT: [[I0:%.*]] = load i16, i16* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i16, i16* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i16, i16* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i16, i16* [[P3]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i16 [[I0]] to i64 +; SLM-NEXT: [[X1:%.*]] = sext i16 [[I1]] to i64 +; SLM-NEXT: [[X2:%.*]] = sext i16 [[I2]] to i64 +; SLM-NEXT: [[X3:%.*]] = sext i16 [[I3]] to i64 +; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SLM-NEXT: ret <4 x i64> [[V3]] ; ; AVX-LABEL: @loadext_4i16_to_4i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1 @@ -953,15 +1013,26 @@ ; define <2 x i64> @loadext_2i32_to_2i64(i32* %p0) { -; SSE-LABEL: @loadext_2i32_to_2i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 -; SSE-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 -; SSE-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64 -; SSE-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64 -; SSE-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE-NEXT: ret <2 x i64> [[V1]] +; SSE2-LABEL: @loadext_2i32_to_2i64( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[TMP5]], i32 1 +; SSE2-NEXT: ret <2 x i64> [[V1]] +; +; SLM-LABEL: @loadext_2i32_to_2i64( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 +; SLM-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64 +; SLM-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64 +; SLM-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SLM-NEXT: ret <2 x i64> [[V1]] ; ; AVX-LABEL: @loadext_2i32_to_2i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 @@ -985,23 +1056,42 @@ } define <4 x i64> @loadext_4i32_to_4i64(i32* %p0) { -; SSE-LABEL: @loadext_4i32_to_4i64( -; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 -; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 -; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 -; SSE-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 -; SSE-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 -; SSE-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1 -; SSE-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1 -; SSE-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64 -; SSE-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64 -; SSE-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64 -; SSE-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64 -; SSE-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 -; SSE-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 -; SSE-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 -; SSE-NEXT: ret <4 x i64> [[V3]] +; SSE2-LABEL: @loadext_4i32_to_4i64( +; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 +; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 +; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 +; SSE2-NEXT: [[TMP1:%.*]] = bitcast i32* [[P0]] to <2 x i32>* +; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 1 +; SSE2-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1 +; SSE2-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1 +; SSE2-NEXT: [[TMP3:%.*]] = sext <2 x i32> [[TMP2]] to <2 x i64> +; SSE2-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64 +; SSE2-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64 +; SSE2-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0 +; SSE2-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[TMP4]], i32 0 +; SSE2-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1 +; SSE2-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[TMP5]], i32 1 +; SSE2-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SSE2-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SSE2-NEXT: ret <4 x i64> [[V3]] +; +; SLM-LABEL: @loadext_4i32_to_4i64( +; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1 +; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 2 +; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i32, i32* [[P0]], i64 3 +; SLM-NEXT: [[I0:%.*]] = load i32, i32* [[P0]], align 1 +; SLM-NEXT: [[I1:%.*]] = load i32, i32* [[P1]], align 1 +; SLM-NEXT: [[I2:%.*]] = load i32, i32* [[P2]], align 1 +; SLM-NEXT: [[I3:%.*]] = load i32, i32* [[P3]], align 1 +; SLM-NEXT: [[X0:%.*]] = sext i32 [[I0]] to i64 +; SLM-NEXT: [[X1:%.*]] = sext i32 [[I1]] to i64 +; SLM-NEXT: [[X2:%.*]] = sext i32 [[I2]] to i64 +; SLM-NEXT: [[X3:%.*]] = sext i32 [[I3]] to i64 +; SLM-NEXT: [[V0:%.*]] = insertelement <4 x i64> undef, i64 [[X0]], i32 0 +; SLM-NEXT: [[V1:%.*]] = insertelement <4 x i64> [[V0]], i64 [[X1]], i32 1 +; SLM-NEXT: [[V2:%.*]] = insertelement <4 x i64> [[V1]], i64 [[X2]], i32 2 +; SLM-NEXT: [[V3:%.*]] = insertelement <4 x i64> [[V2]], i64 [[X3]], i32 3 +; SLM-NEXT: ret <4 x i64> [[V3]] ; ; AVX-LABEL: @loadext_4i32_to_4i64( ; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i32, i32* [[P0:%.*]], i64 1