Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -2394,25 +2394,42 @@ if (ShuffleVectorInst *SVI = dyn_cast(Src)) { // Okay, we have (bitcast (shuffle ..)). Check to see if this is // a bitcast to a vector with the same # elts. + Value *ShufOp0 = SVI->getOperand(0); + Value *ShufOp1 = SVI->getOperand(1); if (SVI->hasOneUse() && DestTy->isVectorTy() && DestTy->getVectorNumElements() == SVI->getType()->getNumElements() && SVI->getType()->getNumElements() == - SVI->getOperand(0)->getType()->getVectorNumElements()) { + ShufOp0->getType()->getVectorNumElements()) { BitCastInst *Tmp; // If either of the operands is a cast from CI.getType(), then // evaluating the shuffle in the casted destination's type will allow // us to eliminate at least one cast. - if (((Tmp = dyn_cast(SVI->getOperand(0))) && + if (((Tmp = dyn_cast(ShufOp0)) && Tmp->getOperand(0)->getType() == DestTy) || - ((Tmp = dyn_cast(SVI->getOperand(1))) && + ((Tmp = dyn_cast(ShufOp1)) && Tmp->getOperand(0)->getType() == DestTy)) { - Value *LHS = Builder.CreateBitCast(SVI->getOperand(0), DestTy); - Value *RHS = Builder.CreateBitCast(SVI->getOperand(1), DestTy); + Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy); + Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy); // Return a new shuffle vector. Use the same element ID's, as we // know the vector types match #elts. return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2)); } } + + // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as + // a byte-swap: + // bitcast (shuf X, undef, ) --> bswap (bitcast X) + if (SVI->hasOneUse() && SVI->isReverse() && + (SrcTy->getVectorNumElements() % 2) == 0 && + SrcTy->getScalarSizeInBits() == 8 && DestTy->isIntegerTy() && + DL.isLegalInteger(DestTy->getScalarSizeInBits())) { + assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask"); + assert(isa(ShufOp1) && "Unexpected shuffle op"); + Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, + DestTy); + Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy); + return IntrinsicInst::Create(F, { ScalarX }); + } } // Handle the A->B->A cast, and there is an intervening PHI node. Index: llvm/test/Transforms/InstCombine/bswap.ll =================================================================== --- llvm/test/Transforms/InstCombine/bswap.ll +++ llvm/test/Transforms/InstCombine/bswap.ll @@ -233,8 +233,8 @@ define i32 @shuf_4bytes(<4 x i8> %x) { ; CHECK-LABEL: @shuf_4bytes( -; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> undef, <4 x i32> -; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[X:%.*]] to i32 +; CHECK-NEXT: [[CAST:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]]) ; CHECK-NEXT: ret i32 [[CAST]] ; %bswap = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> @@ -244,9 +244,9 @@ define i32 @shuf_load_4bytes(<4 x i8>* %p) { ; CHECK-LABEL: @shuf_load_4bytes( -; CHECK-NEXT: [[X:%.*]] = load <4 x i8>, <4 x i8>* [[P:%.*]], align 4 -; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> undef, <4 x i32> -; CHECK-NEXT: [[CAST:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8>* [[P:%.*]] to i32* +; CHECK-NEXT: [[X1:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[CAST:%.*]] = call i32 @llvm.bswap.i32(i32 [[X1]]) ; CHECK-NEXT: ret i32 [[CAST]] ; %x = load <4 x i8>, <4 x i8>* %p @@ -257,9 +257,7 @@ define i32 @shuf_bitcast_twice_4bytes(i32 %x) { ; CHECK-LABEL: @shuf_bitcast_twice_4bytes( -; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8> -; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <4 x i8> [[CAST1]], <4 x i8> undef, <4 x i32> -; CHECK-NEXT: [[CAST2:%.*]] = bitcast <4 x i8> [[BSWAP]] to i32 +; CHECK-NEXT: [[CAST2:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) ; CHECK-NEXT: ret i32 [[CAST2]] ; %cast1 = bitcast i32 %x to <4 x i8> @@ -268,6 +266,7 @@ ret i32 %cast2 } +; Negative test - extra use declare void @use(<4 x i8>) define i32 @shuf_4bytes_extra_use(<4 x i8> %x) { @@ -283,6 +282,8 @@ ret i32 %cast } +; Negative test - scalar type is not in the data layout + define i128 @shuf_load_16bytes(<16 x i8> %x) { ; CHECK-LABEL: @shuf_load_16bytes( ; CHECK-NEXT: [[BSWAP:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32>