Index: llvm/lib/Transforms/Scalar/Scalarizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -762,6 +762,12 @@ Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(), BCI.getName() + ".i" + Twine(I)); } else if (DstNumElems > SrcNumElems) { + // Abort and do nothing if we don't have the special case + // -> . + unsigned N = DstNumElems / SrcNumElems; + if (DstNumElems != SrcNumElems * N) + return false; + // -> . Convert each t1 to and copy the // individual elements to the destination. unsigned FanOut = DstNumElems / SrcNumElems; @@ -781,6 +787,12 @@ Res[ResI++] = Mid[MidI]; } } else { + // Abort and do nothing if we don't have the special case + // -> . + unsigned N = SrcNumElems / DstNumElems; + if (SrcNumElems != DstNumElems * N) + return false; + // -> . Convert each group of into a t2. unsigned FanIn = SrcNumElems / DstNumElems; auto *MidTy = FixedVectorType::get(SrcVT->getElementType(), FanIn); Index: llvm/test/Transforms/Scalarizer/bitcast.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Scalarizer/bitcast.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=scalarizer %s -o - -S | FileCheck %s + +; REQUIRES: asserts + +; Scalarize bitcasts of vectors to the same, larger and smaller number of +; elements. +; In some cases we can indeed scalarize, but in some cases we just leave the +; bitcast be. In any case we should not crash, which used to happen for the +; 3 to 4 and 4 to 3 cases. + +@g = global [12 x i8] zeroinitializer + +; Same number of elements + +define void @bitcast_3_to_3_elements() { +; CHECK-LABEL: define void @bitcast_3_to_3_elements() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC:%.*]] = load <3 x i32>, ptr @g, align 16 +; CHECK-NEXT: [[VEC_I0:%.*]] = extractelement <3 x i32> [[VEC]], i64 0 +; CHECK-NEXT: [[BC_I0:%.*]] = bitcast i32 [[VEC_I0]] to float +; CHECK-NEXT: [[VEC_I1:%.*]] = extractelement <3 x i32> [[VEC]], i64 1 +; CHECK-NEXT: [[BC_I1:%.*]] = bitcast i32 [[VEC_I1]] to float +; CHECK-NEXT: [[VEC_I2:%.*]] = extractelement <3 x i32> [[VEC]], i64 2 +; CHECK-NEXT: [[BC_I2:%.*]] = bitcast i32 [[VEC_I2]] to float +; CHECK-NEXT: ret void +; +entry: + %vec = load <3 x i32>, ptr @g + %bc = bitcast <3 x i32> %vec to <3 x float> + ret void +} + +; Larger number of elements + +define void @bitcast_2_to_4_elements() { +; CHECK-LABEL: define void @bitcast_2_to_4_elements() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC:%.*]] = load <2 x i48>, ptr @g, align 16 +; CHECK-NEXT: [[VEC_I0:%.*]] = extractelement <2 x i48> [[VEC]], i64 0 +; CHECK-NEXT: [[VEC_I0_CAST:%.*]] = bitcast i48 [[VEC_I0]] to <2 x i24> +; CHECK-NEXT: [[VEC_I0_CAST_I0:%.*]] = extractelement <2 x i24> [[VEC_I0_CAST]], i64 0 +; CHECK-NEXT: [[VEC_I0_CAST_I1:%.*]] = extractelement <2 x i24> [[VEC_I0_CAST]], i64 1 +; CHECK-NEXT: [[VEC_I1:%.*]] = extractelement <2 x i48> [[VEC]], i64 1 +; CHECK-NEXT: [[VEC_I1_CAST:%.*]] = bitcast i48 [[VEC_I1]] to <2 x i24> +; CHECK-NEXT: [[VEC_I1_CAST_I0:%.*]] = extractelement <2 x i24> [[VEC_I1_CAST]], i64 0 +; CHECK-NEXT: [[VEC_I1_CAST_I1:%.*]] = extractelement <2 x i24> [[VEC_I1_CAST]], i64 1 +; CHECK-NEXT: ret void +; +entry: + %vec = load <2 x i48>, ptr @g + %bc = bitcast <2 x i48> %vec to <4 x i24> + ret void +} + +define void @bitcast_3_to_4_elements() { +; CHECK-LABEL: define void @bitcast_3_to_4_elements() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC:%.*]] = load <3 x i32>, ptr @g, align 16 +; CHECK-NEXT: [[BC:%.*]] = bitcast <3 x i32> [[VEC]] to <4 x i24> +; CHECK-NEXT: ret void +; +entry: + %vec = load <3 x i32>, ptr @g + %bc = bitcast <3 x i32> %vec to <4x i24> + ret void +} + +; Smaller number of elements + +define void @bitcast_4_to_2_elements() { +; CHECK-LABEL: define void @bitcast_4_to_2_elements() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC:%.*]] = load <4 x i24>, ptr @g, align 16 +; CHECK-NEXT: [[VEC_I0:%.*]] = extractelement <4 x i24> [[VEC]], i64 0 +; CHECK-NEXT: [[BC_I0_UPTO0:%.*]] = insertelement <2 x i24> poison, i24 [[VEC_I0]], i64 0 +; CHECK-NEXT: [[VEC_I1:%.*]] = extractelement <4 x i24> [[VEC]], i64 1 +; CHECK-NEXT: [[BC_I0_UPTO1:%.*]] = insertelement <2 x i24> [[BC_I0_UPTO0]], i24 [[VEC_I1]], i64 1 +; CHECK-NEXT: [[BC_I0:%.*]] = bitcast <2 x i24> [[BC_I0_UPTO1]] to i48 +; CHECK-NEXT: [[VEC_I2:%.*]] = extractelement <4 x i24> [[VEC]], i64 2 +; CHECK-NEXT: [[BC_I1_UPTO0:%.*]] = insertelement <2 x i24> poison, i24 [[VEC_I2]], i64 0 +; CHECK-NEXT: [[VEC_I3:%.*]] = extractelement <4 x i24> [[VEC]], i64 3 +; CHECK-NEXT: [[BC_I1_UPTO1:%.*]] = insertelement <2 x i24> [[BC_I1_UPTO0]], i24 [[VEC_I3]], i64 1 +; CHECK-NEXT: [[BC_I1:%.*]] = bitcast <2 x i24> [[BC_I1_UPTO1]] to i48 +; CHECK-NEXT: ret void +; +entry: + %vec = load <4 x i24>, ptr @g + %bc = bitcast <4 x i24> %vec to <2 x i48> + ret void +} + +define void @bitcast_4_to_3_elements() { +; CHECK-LABEL: define void @bitcast_4_to_3_elements() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VEC:%.*]] = load <4 x i24>, ptr @g, align 16 +; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i24> [[VEC]] to <3 x i32> +; CHECK-NEXT: ret void +; +entry: + %vec = load <4 x i24>, ptr @g + %bc = bitcast <4 x i24> %vec to <3 x i32> + ret void +}