Skip to content

Commit 3746e11

Browse files
committedOct 4, 2018
[InstCombine] allow bitcast to/from FP for vector insert/extract transform
This is a follow-up to rL343482 / D52439. This was a pattern that initially caused the commit to be reverted because the transform requires a bitcast as shown here. llvm-svn: 343794
1 parent a4c17dd commit 3746e11

File tree

3 files changed

+53
-17
lines changed

3 files changed

+53
-17
lines changed
 

‎llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp

+31-4
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,7 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
189189

190190
// If the source elements are wider than the destination, try to shift and
191191
// truncate a subset of scalar bits of an insert op.
192-
// TODO: This is limited to integer types, but we could bitcast to/from FP.
193-
if (NumSrcElts < NumElts && SrcTy->getScalarType()->isIntegerTy() &&
194-
DestTy->getScalarType()->isIntegerTy()) {
192+
if (NumSrcElts < NumElts) {
195193
Value *Scalar;
196194
uint64_t InsIndexC;
197195
if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
@@ -220,13 +218,42 @@ static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
220218
unsigned Chunk = ExtIndexC % NarrowingRatio;
221219
if (IsBigEndian)
222220
Chunk = NarrowingRatio - 1 - Chunk;
223-
unsigned ShAmt = Chunk * DestTy->getPrimitiveSizeInBits();
221+
222+
// Bail out if this is an FP vector to FP vector sequence. That would take
223+
// more instructions than we started with unless there is no shift, and it
224+
// may not be handled as well in the backend.
225+
bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
226+
bool NeedDestBitcast = DestTy->isFloatingPointTy();
227+
if (NeedSrcBitcast && NeedDestBitcast)
228+
return nullptr;
229+
230+
unsigned SrcWidth = SrcTy->getScalarSizeInBits();
231+
unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
232+
unsigned ShAmt = Chunk * DestWidth;
233+
234+
// TODO: This limitation is more strict than necessary. We could sum the
235+
// number of new instructions and subtract the number eliminated to know if
236+
// we can proceed.
237+
if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
238+
if (NeedSrcBitcast || NeedDestBitcast)
239+
return nullptr;
240+
241+
if (NeedSrcBitcast) {
242+
Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
243+
Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
244+
}
245+
224246
if (ShAmt) {
225247
// Bail out if we could end with more instructions than we started with.
226248
if (!Ext.getVectorOperand()->hasOneUse())
227249
return nullptr;
228250
Scalar = Builder.CreateLShr(Scalar, ShAmt);
229251
}
252+
253+
if (NeedDestBitcast) {
254+
Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
255+
return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
256+
}
230257
return new TruncInst(Scalar, DestTy);
231258
}
232259

‎llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-demanded-vector-elts.ll

+2-3
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,8 @@ define i32 @extract0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
299299

300300
; CHECK-LABEL: @extract_lo16_0_bitcast_buffer_load_format_v4f32(
301301
; CHECK-NEXT: %tmp = call float @llvm.amdgcn.buffer.load.format.f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false)
302-
; CHECK-NEXT: %1 = insertelement <4 x float> undef, float %tmp, i64 0
303-
; CHECK-NEXT: %tmp1 = bitcast <4 x float> %1 to <8 x i16>
304-
; CHECK-NEXT: %tmp2 = extractelement <8 x i16> %tmp1, i32 0
302+
; CHECK-NEXT: %1 = bitcast float %tmp to i32
303+
; CHECK-NEXT: %tmp2 = trunc i32 %1 to i16
305304
; CHECK-NEXT: ret i16 %tmp2
306305
define i16 @extract_lo16_0_bitcast_buffer_load_format_v4f32(i32 %arg) #0 {
307306
%tmp = call <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> undef, i32 %arg, i32 16, i1 false, i1 false) #3

‎llvm/test/Transforms/InstCombine/extractelement.ll

+20-10
Original file line numberDiff line numberDiff line change
@@ -164,11 +164,16 @@ define i8 @bitcasted_inselt_wide_source_uses(i32 %x) {
164164
}
165165

166166
define float @bitcasted_inselt_to_FP(i64 %x) {
167-
; ANY-LABEL: @bitcasted_inselt_to_FP(
168-
; ANY-NEXT: [[I:%.*]] = insertelement <2 x i64> undef, i64 [[X:%.*]], i32 0
169-
; ANY-NEXT: [[B:%.*]] = bitcast <2 x i64> [[I]] to <4 x float>
170-
; ANY-NEXT: [[R:%.*]] = extractelement <4 x float> [[B]], i32 1
171-
; ANY-NEXT: ret float [[R]]
167+
; LE-LABEL: @bitcasted_inselt_to_FP(
168+
; LE-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 32
169+
; LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
170+
; LE-NEXT: [[R:%.*]] = bitcast i32 [[TMP2]] to float
171+
; LE-NEXT: ret float [[R]]
172+
;
173+
; BE-LABEL: @bitcasted_inselt_to_FP(
174+
; BE-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
175+
; BE-NEXT: [[R:%.*]] = bitcast i32 [[TMP1]] to float
176+
; BE-NEXT: ret float [[R]]
172177
;
173178
%i = insertelement <2 x i64> undef, i64 %x, i32 0
174179
%b = bitcast <2 x i64> %i to <4 x float>
@@ -210,11 +215,16 @@ define float @bitcasted_inselt_to_FP_uses2(i128 %x) {
210215
}
211216

212217
define i32 @bitcasted_inselt_from_FP(double %x) {
213-
; ANY-LABEL: @bitcasted_inselt_from_FP(
214-
; ANY-NEXT: [[I:%.*]] = insertelement <2 x double> undef, double [[X:%.*]], i32 0
215-
; ANY-NEXT: [[B:%.*]] = bitcast <2 x double> [[I]] to <4 x i32>
216-
; ANY-NEXT: [[R:%.*]] = extractelement <4 x i32> [[B]], i32 1
217-
; ANY-NEXT: ret i32 [[R]]
218+
; LE-LABEL: @bitcasted_inselt_from_FP(
219+
; LE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
220+
; LE-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 32
221+
; LE-NEXT: [[R:%.*]] = trunc i64 [[TMP2]] to i32
222+
; LE-NEXT: ret i32 [[R]]
223+
;
224+
; BE-LABEL: @bitcasted_inselt_from_FP(
225+
; BE-NEXT: [[TMP1:%.*]] = bitcast double [[X:%.*]] to i64
226+
; BE-NEXT: [[R:%.*]] = trunc i64 [[TMP1]] to i32
227+
; BE-NEXT: ret i32 [[R]]
218228
;
219229
%i = insertelement <2 x double> undef, double %x, i32 0
220230
%b = bitcast <2 x double> %i to <4 x i32>

0 commit comments

Comments
 (0)
Please sign in to comment.