Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2416,6 +2416,21 @@ return new ShuffleVectorInst(LHS, RHS, Shuf->getOperand(2)); } } + + // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as + // a byte-swap: + // bitcast (shuf X, undef, ) --> bswap (bitcast X) + // TODO: We should match the related pattern for bitreverse. + if (Shuf->hasOneUse() && Shuf->isReverse() && (NumShufElts % 2) == 0 && + SrcTy->getScalarSizeInBits() == 8 && DestTy->isIntegerTy() && + DL.isLegalInteger(DestTy->getScalarSizeInBits())) { + assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask"); + assert(isa(ShufOp1) && "Unexpected shuffle op"); + Function *Bswap = + Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy); + Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy); + return IntrinsicInst::Create(Bswap, { ScalarX }); + } } // Handle the A->B->A cast, and there is an intervening PHI node. Index: llvm/test/Transforms/InstCombine/bswap.ll =================================================================== --- llvm/test/Transforms/InstCombine/bswap.ll +++ llvm/test/Transforms/InstCombine/bswap.ll @@ -233,8 +233,8 @@ define i32 @shuf_4bytes(<4 x i8> %x) { ; CHECK-LABEL: @shuf_4bytes( -; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <4 x i32> -; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[X:%.*]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[CAST]] ; %bswap = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> @@ -244,9 +244,9 @@ define i32 @shuf_load_4bytes(<4 x i8>* %p) { ; CHECK-LABEL: @shuf_load_4bytes( -; CHECK-NEXT: [[X:%.*]] = load <4 x i8>, <4 x i8>* [[P:%.*]], align 4 -; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> undef, <4 x i32> -; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* [[P:%.*]] to i32* +; CHECK-NEXT: [[X1:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[CAST:%.*]] = call i32 @llvm.bswap.i32(i32 [[X1]]) ; CHECK-NEXT: ret i32 [[CAST]] ; %x = load <4 x i8>, <4 x i8>* %p @@ -257,9 +257,7 @@ define i32 @shuf_bitcast_twice_4bytes(i32 %x) { ; CHECK-LABEL: @shuf_bitcast_twice_4bytes( -; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8> -; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[CAST1]], <4 x i8> undef, <4 x i32> -; CHECK-NEXT: [[CAST2:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 +; CHECK-NEXT: [[CAST2:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) ; CHECK-NEXT: ret i32 [[CAST2]] ; %cast1 = bitcast i32 %x to <4 x i8> @@ -268,6 +266,7 @@ ret i32 %cast2 } +; Negative test - extra use declare void @use(<4 x i8>) define i32 @shuf_4bytes_extra_use(<4 x i8> %x) { @@ -283,6 +282,8 @@ ret i32 %cast } +; Negative test - scalar type is not in the data layout + define i128 @shuf_load_16bytes(<16 x i8> %x) { ; CHECK-LABEL: @shuf_load_16bytes( ; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32>