Skip to content

Commit 561c399

Browse files
committedSep 2, 2019
[InstCombine] recognize bswap disguised as shufflevector
bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) to i{N*8} --> bswap (bitcast X to i{N*8}) In PR43146: https://bugs.llvm.org/show_bug.cgi?id=43146 ...we have a more complicated case where SLP is making a mess of bswap. This patch won't do anything for that currently, but we need to improve bswap recognition in instcombine, SLP, and/or a standalone pass to avoid that problem. This is limited using the data-layout so we don't try to do this transform with actual vector types. The backend does not appear to have folds to convert in either direction, so we don't want to mess up something that is actually better lowered as a shuffle. On x86, we're trading something like this: vmovd %edi, %xmm0 vpshufb LCPI0_0(%rip), %xmm0, %xmm0 ## xmm0 = xmm0[3,2,1,0,u,u,u,u,u,u,u,u,u,u,u,u] vmovd %xmm0, %eax For: movl %edi, %eax bswapl %eax Differential Revision: https://reviews.llvm.org/D66965 llvm-svn: 370659
1 parent eafede2 commit 561c399

File tree

2 files changed

+27
-8
lines changed

2 files changed

+27
-8
lines changed
 

‎llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

+16
Original file line numberDiff line numberDiff line change
@@ -2416,6 +2416,22 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
24162416
return new ShuffleVectorInst(LHS, RHS, Shuf->getOperand(2));
24172417
}
24182418
}
2419+
2420+
// A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
2421+
// a byte-swap:
2422+
// bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
2423+
// TODO: We should match the related pattern for bitreverse.
2424+
if (DestTy->isIntegerTy() &&
2425+
DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2426+
SrcTy->getScalarSizeInBits() == 8 && NumShufElts % 2 == 0 &&
2427+
Shuf->hasOneUse() && Shuf->isReverse()) {
2428+
assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2429+
assert(isa<UndefValue>(ShufOp1) && "Unexpected shuffle op");
2430+
Function *Bswap =
2431+
Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);
2432+
Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2433+
return IntrinsicInst::Create(Bswap, { ScalarX });
2434+
}
24192435
}
24202436

24212437
// Handle the A->B->A cast, and there is an intervening PHI node.

‎llvm/test/Transforms/InstCombine/bswap.ll

+11-8
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ define i16 @test10(i32 %a) {
233233

234234
define i32 @shuf_4bytes(<4 x i8> %x) {
235235
; CHECK-LABEL: @shuf_4bytes(
236-
; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
237-
; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32
236+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[X:%.*]] to i32
237+
; CHECK-NEXT: [[CAST:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
238238
; CHECK-NEXT: ret i32 [[CAST]]
239239
;
240240
%bswap = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -244,9 +244,9 @@ define i32 @shuf_4bytes(<4 x i8> %x) {
244244

245245
define i32 @shuf_load_4bytes(<4 x i8>* %p) {
246246
; CHECK-LABEL: @shuf_load_4bytes(
247-
; CHECK-NEXT: [[X:%.*]] = load <4 x i8>, <4 x i8>* [[P:%.*]], align 4
248-
; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 0>
249-
; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32
247+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* [[P:%.*]] to i32*
248+
; CHECK-NEXT: [[X1:%.*]] = load i32, i32* [[TMP1]], align 4
249+
; CHECK-NEXT: [[CAST:%.*]] = call i32 @llvm.bswap.i32(i32 [[X1]])
250250
; CHECK-NEXT: ret i32 [[CAST]]
251251
;
252252
%x = load <4 x i8>, <4 x i8>* %p
@@ -257,9 +257,7 @@ define i32 @shuf_load_4bytes(<4 x i8>* %p) {
257257

258258
define i32 @shuf_bitcast_twice_4bytes(i32 %x) {
259259
; CHECK-LABEL: @shuf_bitcast_twice_4bytes(
260-
; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8>
261-
; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[CAST1]], <4 x i8> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
262-
; CHECK-NEXT: [[CAST2:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32
260+
; CHECK-NEXT: [[CAST2:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]])
263261
; CHECK-NEXT: ret i32 [[CAST2]]
264262
;
265263
%cast1 = bitcast i32 %x to <4 x i8>
@@ -268,6 +266,7 @@ define i32 @shuf_bitcast_twice_4bytes(i32 %x) {
268266
ret i32 %cast2
269267
}
270268

269+
; Negative test - extra use
271270
declare void @use(<4 x i8>)
272271

273272
define i32 @shuf_4bytes_extra_use(<4 x i8> %x) {
@@ -283,6 +282,8 @@ define i32 @shuf_4bytes_extra_use(<4 x i8> %x) {
283282
ret i32 %cast
284283
}
285284

285+
; Negative test - scalar type is not in the data layout
286+
286287
define i128 @shuf_16bytes(<16 x i8> %x) {
287288
; CHECK-LABEL: @shuf_16bytes(
288289
; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -294,6 +295,8 @@ define i128 @shuf_16bytes(<16 x i8> %x) {
294295
ret i128 %cast
295296
}
296297

298+
; Negative test - don't touch widening shuffles (for now)
299+
297300
define i32 @shuf_2bytes_widening(<2 x i8> %x) {
298301
; CHECK-LABEL: @shuf_2bytes_widening(
299302
; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>

0 commit comments

Comments
 (0)
Please sign in to comment.