Index: llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp +++ llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp @@ -82,6 +82,7 @@ int getSizeOf(const Value *Val) const; int getSizeOf(const Type *Ty) const; + int getAllocSizeOf(const Type *Ty) const; int getTypeAlignment(Type *Ty) const; VectorType *getByteVectorTy(int ScLen) const; @@ -424,7 +425,8 @@ int ElemCount = VecTy->getElementCount().getFixedValue(); return HVC.getFullValue(HVC.getBoolTy(ElemCount)); } - return HVC.getFullValue(HVC.getBoolTy()); + // For scalars, return a vector <1 x i1>. + return HVC.getFullValue(HVC.getBoolTy(1)); } auto AlignVectors::getPassThrough(Value *Val) const -> Value * { @@ -443,8 +445,8 @@ auto *PtrTy = cast(Ptr->getType()); if (!PtrTy->isOpaque()) { Type *ElemTy = PtrTy->getElementType(); - int ElemSize = HVC.getSizeOf(ElemTy); - if (Adjust % ElemSize == 0) { + int ElemSize = HVC.getAllocSizeOf(ElemTy); + if (Adjust % ElemSize == 0 && Adjust != 0) { Value *Tmp0 = Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize)); return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo()); @@ -717,7 +719,7 @@ // Maximum alignment present in the whole address group. const AddrInfo &WithMaxAlign = - getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; }); + getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.HaveAlign; }); Align MaxGiven = WithMaxAlign.HaveAlign; // Minimum alignment present in the move address group. @@ -979,6 +981,10 @@ return DL.getTypeStoreSize(const_cast(Ty)).getFixedValue(); } +auto HexagonVectorCombine::getAllocSizeOf(const Type *Ty) const -> int { + return DL.getTypeAllocSize(const_cast(Ty)).getFixedValue(); +} + auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int { // The actual type may be shorter than the HVX vector, so determine // the alignment based on subtarget info. @@ -1177,12 +1183,15 @@ int ToCount = (FromCount * FromSize) / ToSize; assert((FromCount * FromSize) % ToSize == 0); + auto *FromITy = IntegerType::get(F.getContext(), FromSize * 8); + auto *ToITy = IntegerType::get(F.getContext(), ToSize * 8); + // Mask -> sext to -> bitcast to -> // -> trunc to . Value *Ext = Builder.CreateSExt( - Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false)); + Mask, VectorType::get(FromITy, FromCount, /*Scalable*/ false)); Value *Cast = Builder.CreateBitCast( - Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false)); + Ext, VectorType::get(ToITy, ToCount, /*Scalable*/ false)); return Builder.CreateTrunc( Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false)); } @@ -1326,7 +1335,7 @@ return None; Builder B(Gep0->getParent()); - int Scale = DL.getTypeStoreSize(Gep0->getSourceElementType()); + int Scale = getAllocSizeOf(Gep0->getSourceElementType()); // FIXME: for now only check GEPs with a single index. if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2) Index: llvm/test/CodeGen/Hexagon/autohvx/vector-align-addr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Hexagon/autohvx/vector-align-addr.ll @@ -0,0 +1,240 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -hexagon-vc -march=hexagon -hexagon-hvx-widen=32 -S < %s | FileCheck %s + +; Test that the Hexagon Vector Combine pass computes the address +; correctly when the loading objects that contain extra padding +; between successive objects. + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define dllexport void @test(i8* %0) local_unnamed_addr #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 0, 3040 +; CHECK-NEXT: [[TMP2:%.*]] = load i8, i8* undef, align 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <19 x i8> undef, i8 [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <19 x i8> [[TMP3]], <19 x i8> undef, <19 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP0:%.*]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <19 x i8>* +; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i32 0, 5472 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i32 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <19 x i8>* +; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint <19 x i8>* [[TMP6]] to i32 +; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], -128 +; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP11]] to <19 x i8>* +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint <19 x i8>* [[TMP6]] to i32 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <19 x i8>* [[TMP12]] to i8* +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, i8* [[TMP14]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[TMP15]] to <128 x i8>* +; CHECK-NEXT: [[TMP17:%.*]] = load <128 x i8>, <128 x i8>* [[TMP16]], align 128 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <19 x i8>* [[TMP18]] to <128 x i8>* +; CHECK-NEXT: [[TMP20:%.*]] = load <128 x i8>, <128 x i8>* [[TMP19]], align 128 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 8 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <19 x i8>* [[TMP21]] to <128 x i8>* +; CHECK-NEXT: [[TMP23:%.*]] = load <128 x i8>, <128 x i8>* [[TMP22]], align 128 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 12 +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <19 x i8>* [[TMP24]] to <128 x i8>* +; CHECK-NEXT: [[TMP26:%.*]] = load <128 x i8>, <128 x i8>* [[TMP25]], align 128 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 16 +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <19 x i8>* [[TMP27]] to <128 x i8>* +; CHECK-NEXT: [[TMP29:%.*]] = load <128 x i8>, <128 x i8>* [[TMP28]], align 128 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 20 +; CHECK-NEXT: [[TMP31:%.*]] = bitcast <19 x i8>* [[TMP30]] to <128 x i8>* +; CHECK-NEXT: [[TMP32:%.*]] = load <128 x i8>, <128 x i8>* [[TMP31]], align 128 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 24 +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <19 x i8>* [[TMP33]] to <128 x i8>* +; CHECK-NEXT: [[TMP35:%.*]] = load <128 x i8>, <128 x i8>* [[TMP34]], align 128 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 28 +; CHECK-NEXT: [[TMP37:%.*]] = bitcast <19 x i8>* [[TMP36]] to <128 x i8>* +; CHECK-NEXT: [[TMP38:%.*]] = load <128 x i8>, <128 x i8>* [[TMP37]], align 128 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 32 +; CHECK-NEXT: [[TMP40:%.*]] = bitcast <19 x i8>* [[TMP39]] to <128 x i8>* +; CHECK-NEXT: [[TMP41:%.*]] = load <128 x i8>, <128 x i8>* [[TMP40]], align 128 +; CHECK-NEXT: [[TMP42:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 36 +; CHECK-NEXT: [[TMP43:%.*]] = bitcast <19 x i8>* [[TMP42]] to <128 x i8>* +; CHECK-NEXT: [[TMP44:%.*]] = load <128 x i8>, <128 x i8>* [[TMP43]], align 128 +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 40 +; CHECK-NEXT: [[TMP46:%.*]] = bitcast <19 x i8>* [[TMP45]] to <128 x i8>* +; CHECK-NEXT: [[TMP47:%.*]] = load <128 x i8>, <128 x i8>* [[TMP46]], align 128 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 44 +; CHECK-NEXT: [[TMP49:%.*]] = bitcast <19 x i8>* [[TMP48]] to <128 x i8>* +; CHECK-NEXT: [[TMP50:%.*]] = load <128 x i8>, <128 x i8>* [[TMP49]], align 128 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 48 +; CHECK-NEXT: [[TMP52:%.*]] = bitcast <19 x i8>* [[TMP51]] to <128 x i8>* +; CHECK-NEXT: [[TMP53:%.*]] = load <128 x i8>, <128 x i8>* [[TMP52]], align 128 +; CHECK-NEXT: [[TMP54:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 52 +; CHECK-NEXT: [[TMP55:%.*]] = bitcast <19 x i8>* [[TMP54]] to <128 x i8>* +; CHECK-NEXT: [[TMP56:%.*]] = load <128 x i8>, <128 x i8>* [[TMP55]], align 128 +; CHECK-NEXT: [[TMP57:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 56 +; CHECK-NEXT: [[TMP58:%.*]] = bitcast <19 x i8>* [[TMP57]] to <128 x i8>* +; CHECK-NEXT: [[TMP59:%.*]] = load <128 x i8>, <128 x i8>* [[TMP58]], align 128 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 60 +; CHECK-NEXT: [[TMP61:%.*]] = bitcast <19 x i8>* [[TMP60]] to <128 x i8>* +; CHECK-NEXT: [[TMP62:%.*]] = load <128 x i8>, <128 x i8>* [[TMP61]], align 128 +; CHECK-NEXT: [[TMP63:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 64 +; CHECK-NEXT: [[TMP64:%.*]] = bitcast <19 x i8>* [[TMP63]] to <128 x i8>* +; CHECK-NEXT: [[TMP65:%.*]] = load <128 x i8>, <128 x i8>* [[TMP64]], align 128 +; CHECK-NEXT: [[TMP66:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 68 +; CHECK-NEXT: [[TMP67:%.*]] = bitcast <19 x i8>* [[TMP66]] to <128 x i8>* +; CHECK-NEXT: [[TMP68:%.*]] = load <128 x i8>, <128 x i8>* [[TMP67]], align 128 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 72 +; CHECK-NEXT: [[TMP70:%.*]] = bitcast <19 x i8>* [[TMP69]] to <128 x i8>* +; CHECK-NEXT: [[TMP71:%.*]] = load <128 x i8>, <128 x i8>* [[TMP70]], align 128 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 76 +; CHECK-NEXT: [[TMP73:%.*]] = bitcast <19 x i8>* [[TMP72]] to <128 x i8>* +; CHECK-NEXT: [[TMP74:%.*]] = load <128 x i8>, <128 x i8>* [[TMP73]], align 128 +; CHECK-NEXT: [[TMP75:%.*]] = getelementptr <19 x i8>, <19 x i8>* [[TMP12]], i32 80 +; CHECK-NEXT: [[TMP76:%.*]] = bitcast <19 x i8>* [[TMP75]] to <128 x i8>* +; CHECK-NEXT: [[TMP77:%.*]] = load <128 x i8>, <128 x i8>* [[TMP76]], align 128 +; CHECK-NEXT: [[TMP78:%.*]] = bitcast <128 x i8> [[TMP20]] to <32 x i32> +; CHECK-NEXT: [[TMP79:%.*]] = bitcast <128 x i8> [[TMP17]] to <32 x i32> +; CHECK-NEXT: [[TMP80:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP78]], <32 x i32> [[TMP79]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP81:%.*]] = bitcast <32 x i32> [[TMP80]] to <128 x i8> +; CHECK-NEXT: [[TMP82:%.*]] = bitcast <128 x i8> [[TMP23]] to <32 x i32> +; CHECK-NEXT: [[TMP83:%.*]] = bitcast <128 x i8> [[TMP20]] to <32 x i32> +; CHECK-NEXT: [[TMP84:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP82]], <32 x i32> [[TMP83]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP85:%.*]] = bitcast <32 x i32> [[TMP84]] to <128 x i8> +; CHECK-NEXT: [[TMP86:%.*]] = bitcast <128 x i8> [[TMP26]] to <32 x i32> +; CHECK-NEXT: [[TMP87:%.*]] = bitcast <128 x i8> [[TMP23]] to <32 x i32> +; CHECK-NEXT: [[TMP88:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP86]], <32 x i32> [[TMP87]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP89:%.*]] = bitcast <32 x i32> [[TMP88]] to <128 x i8> +; CHECK-NEXT: [[TMP90:%.*]] = bitcast <128 x i8> [[TMP29]] to <32 x i32> +; CHECK-NEXT: [[TMP91:%.*]] = bitcast <128 x i8> [[TMP26]] to <32 x i32> +; CHECK-NEXT: [[TMP92:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP90]], <32 x i32> [[TMP91]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP93:%.*]] = bitcast <32 x i32> [[TMP92]] to <128 x i8> +; CHECK-NEXT: [[TMP94:%.*]] = bitcast <128 x i8> [[TMP32]] to <32 x i32> +; CHECK-NEXT: [[TMP95:%.*]] = bitcast <128 x i8> [[TMP29]] to <32 x i32> +; CHECK-NEXT: [[TMP96:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP94]], <32 x i32> [[TMP95]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP97:%.*]] = bitcast <32 x i32> [[TMP96]] to <128 x i8> +; CHECK-NEXT: [[TMP98:%.*]] = bitcast <128 x i8> [[TMP35]] to <32 x i32> +; CHECK-NEXT: [[TMP99:%.*]] = bitcast <128 x i8> [[TMP32]] to <32 x i32> +; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP98]], <32 x i32> [[TMP99]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP101:%.*]] = bitcast <32 x i32> [[TMP100]] to <128 x i8> +; CHECK-NEXT: [[TMP102:%.*]] = bitcast <128 x i8> [[TMP38]] to <32 x i32> +; CHECK-NEXT: [[TMP103:%.*]] = bitcast <128 x i8> [[TMP35]] to <32 x i32> +; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP102]], <32 x i32> [[TMP103]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP105:%.*]] = bitcast <32 x i32> [[TMP104]] to <128 x i8> +; CHECK-NEXT: [[TMP106:%.*]] = bitcast <128 x i8> [[TMP41]] to <32 x i32> +; CHECK-NEXT: [[TMP107:%.*]] = bitcast <128 x i8> [[TMP38]] to <32 x i32> +; CHECK-NEXT: [[TMP108:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP106]], <32 x i32> [[TMP107]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP109:%.*]] = bitcast <32 x i32> [[TMP108]] to <128 x i8> +; CHECK-NEXT: [[TMP110:%.*]] = bitcast <128 x i8> [[TMP44]] to <32 x i32> +; CHECK-NEXT: [[TMP111:%.*]] = bitcast <128 x i8> [[TMP41]] to <32 x i32> +; CHECK-NEXT: [[TMP112:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP110]], <32 x i32> [[TMP111]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP113:%.*]] = bitcast <32 x i32> [[TMP112]] to <128 x i8> +; CHECK-NEXT: [[TMP114:%.*]] = bitcast <128 x i8> [[TMP47]] to <32 x i32> +; CHECK-NEXT: [[TMP115:%.*]] = bitcast <128 x i8> [[TMP44]] to <32 x i32> +; CHECK-NEXT: [[TMP116:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP114]], <32 x i32> [[TMP115]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP117:%.*]] = bitcast <32 x i32> [[TMP116]] to <128 x i8> +; CHECK-NEXT: [[TMP118:%.*]] = bitcast <128 x i8> [[TMP50]] to <32 x i32> +; CHECK-NEXT: [[TMP119:%.*]] = bitcast <128 x i8> [[TMP47]] to <32 x i32> +; CHECK-NEXT: [[TMP120:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP118]], <32 x i32> [[TMP119]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP121:%.*]] = bitcast <32 x i32> [[TMP120]] to <128 x i8> +; CHECK-NEXT: [[TMP122:%.*]] = bitcast <128 x i8> [[TMP53]] to <32 x i32> +; CHECK-NEXT: [[TMP123:%.*]] = bitcast <128 x i8> [[TMP50]] to <32 x i32> +; CHECK-NEXT: [[TMP124:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP122]], <32 x i32> [[TMP123]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP125:%.*]] = bitcast <32 x i32> [[TMP124]] to <128 x i8> +; CHECK-NEXT: [[TMP126:%.*]] = bitcast <128 x i8> [[TMP56]] to <32 x i32> +; CHECK-NEXT: [[TMP127:%.*]] = bitcast <128 x i8> [[TMP53]] to <32 x i32> +; CHECK-NEXT: [[TMP128:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP126]], <32 x i32> [[TMP127]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP129:%.*]] = bitcast <32 x i32> [[TMP128]] to <128 x i8> +; CHECK-NEXT: [[TMP130:%.*]] = bitcast <128 x i8> [[TMP59]] to <32 x i32> +; CHECK-NEXT: [[TMP131:%.*]] = bitcast <128 x i8> [[TMP56]] to <32 x i32> +; CHECK-NEXT: [[TMP132:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP130]], <32 x i32> [[TMP131]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP133:%.*]] = bitcast <32 x i32> [[TMP132]] to <128 x i8> +; CHECK-NEXT: [[TMP134:%.*]] = bitcast <128 x i8> [[TMP62]] to <32 x i32> +; CHECK-NEXT: [[TMP135:%.*]] = bitcast <128 x i8> [[TMP59]] to <32 x i32> +; CHECK-NEXT: [[TMP136:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP134]], <32 x i32> [[TMP135]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP137:%.*]] = bitcast <32 x i32> [[TMP136]] to <128 x i8> +; CHECK-NEXT: [[TMP138:%.*]] = bitcast <128 x i8> [[TMP65]] to <32 x i32> +; CHECK-NEXT: [[TMP139:%.*]] = bitcast <128 x i8> [[TMP62]] to <32 x i32> +; CHECK-NEXT: [[TMP140:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP138]], <32 x i32> [[TMP139]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP141:%.*]] = bitcast <32 x i32> [[TMP140]] to <128 x i8> +; CHECK-NEXT: [[TMP142:%.*]] = bitcast <128 x i8> [[TMP68]] to <32 x i32> +; CHECK-NEXT: [[TMP143:%.*]] = bitcast <128 x i8> [[TMP65]] to <32 x i32> +; CHECK-NEXT: [[TMP144:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP142]], <32 x i32> [[TMP143]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP145:%.*]] = bitcast <32 x i32> [[TMP144]] to <128 x i8> +; CHECK-NEXT: [[TMP146:%.*]] = bitcast <128 x i8> [[TMP71]] to <32 x i32> +; CHECK-NEXT: [[TMP147:%.*]] = bitcast <128 x i8> [[TMP68]] to <32 x i32> +; CHECK-NEXT: [[TMP148:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP146]], <32 x i32> [[TMP147]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP149:%.*]] = bitcast <32 x i32> [[TMP148]] to <128 x i8> +; CHECK-NEXT: [[TMP150:%.*]] = bitcast <128 x i8> [[TMP74]] to <32 x i32> +; CHECK-NEXT: [[TMP151:%.*]] = bitcast <128 x i8> [[TMP71]] to <32 x i32> +; CHECK-NEXT: [[TMP152:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP150]], <32 x i32> [[TMP151]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP153:%.*]] = bitcast <32 x i32> [[TMP152]] to <128 x i8> +; CHECK-NEXT: [[TMP154:%.*]] = bitcast <128 x i8> [[TMP77]] to <32 x i32> +; CHECK-NEXT: [[TMP155:%.*]] = bitcast <128 x i8> [[TMP74]] to <32 x i32> +; CHECK-NEXT: [[TMP156:%.*]] = call <32 x i32> @llvm.hexagon.V6.valignb.128B(<32 x i32> [[TMP154]], <32 x i32> [[TMP155]], i32 [[TMP13]]) +; CHECK-NEXT: [[TMP157:%.*]] = bitcast <32 x i32> [[TMP156]] to <128 x i8> +; CHECK-NEXT: [[TMP158:%.*]] = shufflevector <128 x i8> [[TMP81]], <128 x i8> undef, <256 x i32> +; CHECK-NEXT: [[TMP159:%.*]] = shufflevector <256 x i8> undef, <256 x i8> [[TMP158]], <256 x i32> +; CHECK-NEXT: [[TMP160:%.*]] = shufflevector <256 x i8> [[TMP159]], <256 x i8> [[TMP159]], <19 x i32> +; CHECK-NEXT: [[TMP161:%.*]] = select <19 x i1> , <19 x i8> [[TMP160]], <19 x i8> undef +; CHECK-NEXT: [[TMP162:%.*]] = shufflevector <128 x i8> [[TMP157]], <128 x i8> undef, <256 x i32> +; CHECK-NEXT: [[TMP163:%.*]] = shufflevector <256 x i8> undef, <256 x i8> [[TMP162]], <256 x i32> +; CHECK-NEXT: [[TMP164:%.*]] = shufflevector <256 x i8> [[TMP163]], <256 x i8> [[TMP163]], <19 x i32> +; CHECK-NEXT: [[TMP165:%.*]] = select <19 x i1> , <19 x i8> [[TMP164]], <19 x i8> undef +; CHECK-NEXT: [[TMP166:%.*]] = mul <19 x i8> [[TMP4]], [[TMP161]] +; CHECK-NEXT: [[TMP167:%.*]] = add <19 x i8> [[TMP166]], zeroinitializer +; CHECK-NEXT: [[TMP168:%.*]] = add <19 x i8> zeroinitializer, [[TMP167]] +; CHECK-NEXT: [[TMP169:%.*]] = add <19 x i8> zeroinitializer, [[TMP168]] +; CHECK-NEXT: [[TMP170:%.*]] = add <19 x i8> zeroinitializer, [[TMP169]] +; CHECK-NEXT: [[TMP171:%.*]] = add <19 x i8> zeroinitializer, [[TMP170]] +; CHECK-NEXT: [[TMP172:%.*]] = add <19 x i8> zeroinitializer, [[TMP171]] +; CHECK-NEXT: [[TMP173:%.*]] = add <19 x i8> zeroinitializer, [[TMP172]] +; CHECK-NEXT: [[TMP174:%.*]] = add <19 x i8> zeroinitializer, [[TMP173]] +; CHECK-NEXT: [[TMP175:%.*]] = add <19 x i8> zeroinitializer, [[TMP174]] +; CHECK-NEXT: [[TMP176:%.*]] = add <19 x i8> zeroinitializer, [[TMP175]] +; CHECK-NEXT: [[TMP177:%.*]] = add <19 x i8> zeroinitializer, [[TMP176]] +; CHECK-NEXT: [[TMP178:%.*]] = load i8, i8* undef, align 1 +; CHECK-NEXT: [[TMP179:%.*]] = insertelement <19 x i8> undef, i8 [[TMP178]], i32 0 +; CHECK-NEXT: [[TMP180:%.*]] = shufflevector <19 x i8> [[TMP179]], <19 x i8> undef, <19 x i32> zeroinitializer +; CHECK-NEXT: [[TMP181:%.*]] = mul <19 x i8> [[TMP180]], [[TMP165]] +; CHECK-NEXT: [[TMP182:%.*]] = add <19 x i8> [[TMP181]], [[TMP177]] +; CHECK-NEXT: [[TMP183:%.*]] = add <19 x i8> zeroinitializer, [[TMP182]] +; CHECK-NEXT: [[TMP184:%.*]] = add <19 x i8> zeroinitializer, [[TMP183]] +; CHECK-NEXT: [[TMP185:%.*]] = add <19 x i8> zeroinitializer, [[TMP184]] +; CHECK-NEXT: [[TMP186:%.*]] = bitcast i8* [[TMP0]] to <19 x i8>* +; CHECK-NEXT: store <19 x i8> [[TMP185]], <19 x i8>* [[TMP186]], align 1 +; CHECK-NEXT: ret void +; +entry: + %1 = add nuw nsw i32 0, 3040 + %2 = load i8, i8* undef, align 1 + %3 = insertelement <19 x i8> undef, i8 %2, i32 0 + %4 = shufflevector <19 x i8> %3, <19 x i8> undef, <19 x i32> zeroinitializer + %5 = getelementptr inbounds i8, i8* %0, i32 %1 + %6 = bitcast i8* %5 to <19 x i8>* + %7 = load <19 x i8>, <19 x i8>* %6, align 1 + %8 = mul <19 x i8> %4, %7 + %9 = add <19 x i8> %8, zeroinitializer + %10 = add <19 x i8> zeroinitializer, %9 + %11 = add <19 x i8> zeroinitializer, %10 + %12 = add <19 x i8> zeroinitializer, %11 + %13 = add <19 x i8> zeroinitializer, %12 + %14 = add <19 x i8> zeroinitializer, %13 + %15 = add <19 x i8> zeroinitializer, %14 + %16 = add <19 x i8> zeroinitializer, %15 + %17 = add <19 x i8> zeroinitializer, %16 + %18 = add <19 x i8> zeroinitializer, %17 + %19 = add <19 x i8> zeroinitializer, %18 + %20 = load i8, i8* undef, align 1 + %21 = insertelement <19 x i8> undef, i8 %20, i32 0 + %22 = shufflevector <19 x i8> %21, <19 x i8> undef, <19 x i32> zeroinitializer + %23 = add nuw nsw i32 0, 5472 + %24 = getelementptr inbounds i8, i8* %0, i32 %23 + %25 = bitcast i8* %24 to <19 x i8>* + %26 = load <19 x i8>, <19 x i8>* %25, align 1 + %27 = mul <19 x i8> %22, %26 + %28 = add <19 x i8> %27, %19 + %29 = add <19 x i8> zeroinitializer, %28 + %30 = add <19 x i8> zeroinitializer, %29 + %31 = add <19 x i8> zeroinitializer, %30 + %32 = bitcast i8* %0 to <19 x i8>* + store <19 x i8> %31, <19 x i8>* %32, align 1 + ret void +} + +attributes #0 = { "target-features"="+hvxv66,+hvx-length128b" }