Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -436,11 +436,22 @@ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) { return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } else if (ValueVT.bitsLT(PartEVT)) { - // Bitcast Val back the original type and extract the corresponding + uint64_t ScalarSize = ValueVT.getScalarSizeInBits(); + + // Val may be wider than the result vector type. + if (PartEVT.getSizeInBits() % ScalarSize != 0) { + uint64_t ValueSize = ValueVT.getFixedSizeInBits(); + EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); + // Drop the extra bits. + Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val); + return DAG.getBitcast(ValueVT, Val); + } + + // Bitcast the scalar Val to a vector and extract the corresponding // vector we want. - unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits(); - EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), - ValueVT.getVectorElementType(), Elts); + unsigned Elts = PartEVT.getSizeInBits() / ScalarSize; + EVT WiderVecType = EVT::getVectorVT( + *DAG.getContext(), ValueVT.getVectorElementType(), Elts); Val = DAG.getBitcast(WiderVecType, Val); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getVectorIdxConstant(0, DL)); @@ -714,13 +725,25 @@ EVT BuiltVectorTy = EVT::getVectorVT( *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt.getValue()); - if (ValueVT != BuiltVectorTy) { - if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) - Val = Widened; + if (ValueVT == BuiltVectorTy) { + // Nothing to do. + } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) { + // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); + } else if (SDValue Widened = + widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { + Val = Widened; + } else if (BuiltVectorTy.getVectorElementType().bitsGE( + ValueVT.getVectorElementType()) && + BuiltVectorTy.getVectorElementCount() == + ValueVT.getVectorElementCount()) { + // Promoted vector extract + Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy); } + assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type"); + // Split the vector into intermediate operands. SmallVector Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { Index: llvm/lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- llvm/lib/Target/Mips/MipsISelLowering.cpp +++ llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -122,9 +122,7 @@ CallingConv::ID CC, EVT VT) const { if (VT.isVector()) - return std::max(((unsigned)VT.getSizeInBits() / - (Subtarget.isABI_O32() ? 32 : 64)), - 1U); + return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64); return MipsTargetLowering::getNumRegisters(Context, VT); } @@ -134,10 +132,10 @@ // Break down vector types to either 2 i64s or 4 i32s. RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT); IntermediateVT = RegisterVT; - NumIntermediates = VT.getFixedSizeInBits() < RegisterVT.getFixedSizeInBits() - ? VT.getVectorNumElements() - : VT.getSizeInBits() / RegisterVT.getSizeInBits(); - + NumIntermediates = + VT.getFixedSizeInBits() < RegisterVT.getFixedSizeInBits() + ? VT.getVectorNumElements() + : divideCeil(VT.getSizeInBits(), RegisterVT.getSizeInBits()); return NumIntermediates; } Index: llvm/test/CodeGen/Mips/cconv/vector.ll =================================================================== --- llvm/test/CodeGen/Mips/cconv/vector.ll +++ llvm/test/CodeGen/Mips/cconv/vector.ll @@ -6865,3 +6865,207 @@ %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2 ret <4 x float> %res } + +; Check that vectors of types with non-power-of-two size are correctly handled. + +define <2 x i24> @i24x2(<2 x i24> %a, <2 x i24> %b) { +; MIPS32-LABEL: i24x2: +; MIPS32: # %bb.0: # %Entry +; MIPS32-NEXT: addu $2, $4, $6 +; MIPS32-NEXT: addu $3, $5, $7 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +; +; MIPS64-LABEL: i24x2: +; MIPS64: # %bb.0: # %Entry +; MIPS64-NEXT: lui $1, 256 +; MIPS64-NEXT: daddiu $1, $1, -1 +; MIPS64-NEXT: dsll $1, $1, 24 +; MIPS64-NEXT: and $2, $5, $1 +; MIPS64-NEXT: dsrl $2, $2, 24 +; MIPS64-NEXT: sll $2, $2, 0 +; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: dsrl $1, $1, 24 +; MIPS64-NEXT: sll $1, $1, 0 +; MIPS64-NEXT: addu $1, $1, $2 +; MIPS64-NEXT: sll $2, $5, 0 +; MIPS64-NEXT: sll $3, $4, 0 +; MIPS64-NEXT: dsll $1, $1, 24 +; MIPS64-NEXT: addu $2, $3, $2 +; MIPS64-NEXT: lui $3, 255 +; MIPS64-NEXT: ori $3, $3, 65535 +; MIPS64-NEXT: and $2, $2, $3 +; MIPS64-NEXT: or $2, $2, $1 +; MIPS64-NEXT: jr $ra +; MIPS64-NEXT: nop +; +; MIPS32R5EB-LABEL: i24x2: +; MIPS32R5EB: # %bb.0: # %Entry +; MIPS32R5EB-NEXT: addiu $sp, $sp, -48 +; MIPS32R5EB-NEXT: .cfi_def_cfa_offset 48 +; MIPS32R5EB-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EB-NEXT: .cfi_offset 31, -4 +; MIPS32R5EB-NEXT: .cfi_offset 30, -8 +; MIPS32R5EB-NEXT: move $fp, $sp +; MIPS32R5EB-NEXT: .cfi_def_cfa_register 30 +; MIPS32R5EB-NEXT: addiu $1, $zero, -16 +; MIPS32R5EB-NEXT: and $sp, $sp, $1 +; MIPS32R5EB-NEXT: sw $7, 28($sp) +; MIPS32R5EB-NEXT: sw $6, 20($sp) +; MIPS32R5EB-NEXT: sw $5, 12($sp) +; MIPS32R5EB-NEXT: sw $4, 4($sp) +; MIPS32R5EB-NEXT: ld.d $w0, 16($sp) +; MIPS32R5EB-NEXT: ld.d $w1, 0($sp) +; MIPS32R5EB-NEXT: addv.d $w0, $w1, $w0 +; MIPS32R5EB-NEXT: shf.w $w0, $w0, 177 +; MIPS32R5EB-NEXT: copy_s.w $2, $w0[1] +; MIPS32R5EB-NEXT: copy_s.w $3, $w0[3] +; MIPS32R5EB-NEXT: move $sp, $fp +; MIPS32R5EB-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MIPS32R5EB-NEXT: addiu $sp, $sp, 48 +; MIPS32R5EB-NEXT: jr $ra +; MIPS32R5EB-NEXT: nop +; +; MIPS64R5EB-LABEL: i24x2: +; MIPS64R5EB: # %bb.0: # %Entry +; MIPS64R5EB-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EB-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EB-NEXT: dsrl $1, $5, 16 +; MIPS64R5EB-NEXT: sw $1, 16($sp) +; MIPS64R5EB-NEXT: sh $5, 20($sp) +; MIPS64R5EB-NEXT: dsrl $1, $4, 16 +; MIPS64R5EB-NEXT: sw $1, 24($sp) +; MIPS64R5EB-NEXT: sh $4, 28($sp) +; MIPS64R5EB-NEXT: lb $1, 19($sp) +; MIPS64R5EB-NEXT: dsll $1, $1, 8 +; MIPS64R5EB-NEXT: lbu $2, 20($sp) +; MIPS64R5EB-NEXT: or $1, $1, $2 +; MIPS64R5EB-NEXT: dsll $1, $1, 8 +; MIPS64R5EB-NEXT: lb $2, 27($sp) +; MIPS64R5EB-NEXT: dsll $2, $2, 8 +; MIPS64R5EB-NEXT: lbu $3, 28($sp) +; MIPS64R5EB-NEXT: or $2, $2, $3 +; MIPS64R5EB-NEXT: lbu $3, 21($sp) +; MIPS64R5EB-NEXT: dsll $2, $2, 8 +; MIPS64R5EB-NEXT: or $1, $3, $1 +; MIPS64R5EB-NEXT: lh $3, 16($sp) +; MIPS64R5EB-NEXT: dsll $3, $3, 8 +; MIPS64R5EB-NEXT: lbu $4, 18($sp) +; MIPS64R5EB-NEXT: or $3, $4, $3 +; MIPS64R5EB-NEXT: lbu $4, 29($sp) +; MIPS64R5EB-NEXT: insert.d $w0[0], $3 +; MIPS64R5EB-NEXT: insert.d $w0[1], $1 +; MIPS64R5EB-NEXT: or $1, $4, $2 +; MIPS64R5EB-NEXT: lh $2, 24($sp) +; MIPS64R5EB-NEXT: dsll $2, $2, 8 +; MIPS64R5EB-NEXT: lbu $3, 26($sp) +; MIPS64R5EB-NEXT: or $2, $3, $2 +; MIPS64R5EB-NEXT: insert.d $w1[0], $2 +; MIPS64R5EB-NEXT: insert.d $w1[1], $1 +; MIPS64R5EB-NEXT: addv.d $w0, $w1, $w0 +; MIPS64R5EB-NEXT: copy_s.d $1, $w0[1] +; MIPS64R5EB-NEXT: copy_s.d $2, $w0[0] +; MIPS64R5EB-NEXT: sb $2, 10($sp) +; MIPS64R5EB-NEXT: dsrl $3, $1, 16 +; MIPS64R5EB-NEXT: sb $3, 11($sp) +; MIPS64R5EB-NEXT: dsrl $2, $2, 8 +; MIPS64R5EB-NEXT: sh $2, 8($sp) +; MIPS64R5EB-NEXT: sb $1, 13($sp) +; MIPS64R5EB-NEXT: dsrl $1, $1, 8 +; MIPS64R5EB-NEXT: sb $1, 12($sp) +; MIPS64R5EB-NEXT: lw $1, 8($sp) +; MIPS64R5EB-NEXT: dsll $1, $1, 16 +; MIPS64R5EB-NEXT: lhu $2, 12($sp) +; MIPS64R5EB-NEXT: or $2, $2, $1 +; MIPS64R5EB-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EB-NEXT: jr $ra +; MIPS64R5EB-NEXT: nop +; +; MIPS32R5EL-LABEL: i24x2: +; MIPS32R5EL: # %bb.0: # %Entry +; MIPS32R5EL-NEXT: addiu $sp, $sp, -48 +; MIPS32R5EL-NEXT: .cfi_def_cfa_offset 48 +; MIPS32R5EL-NEXT: sw $ra, 44($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: sw $fp, 40($sp) # 4-byte Folded Spill +; MIPS32R5EL-NEXT: .cfi_offset 31, -4 +; MIPS32R5EL-NEXT: .cfi_offset 30, -8 +; MIPS32R5EL-NEXT: move $fp, $sp +; MIPS32R5EL-NEXT: .cfi_def_cfa_register 30 +; MIPS32R5EL-NEXT: addiu $1, $zero, -16 +; MIPS32R5EL-NEXT: and $sp, $sp, $1 +; MIPS32R5EL-NEXT: sw $7, 24($sp) +; MIPS32R5EL-NEXT: sw $6, 16($sp) +; MIPS32R5EL-NEXT: sw $5, 8($sp) +; MIPS32R5EL-NEXT: sw $4, 0($sp) +; MIPS32R5EL-NEXT: ld.d $w0, 16($sp) +; MIPS32R5EL-NEXT: ld.d $w1, 0($sp) +; MIPS32R5EL-NEXT: addv.d $w0, $w1, $w0 +; MIPS32R5EL-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5EL-NEXT: copy_s.w $3, $w0[2] +; MIPS32R5EL-NEXT: move $sp, $fp +; MIPS32R5EL-NEXT: lw $fp, 40($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: lw $ra, 44($sp) # 4-byte Folded Reload +; MIPS32R5EL-NEXT: addiu $sp, $sp, 48 +; MIPS32R5EL-NEXT: jr $ra +; MIPS32R5EL-NEXT: nop +; +; MIPS64R5EL-LABEL: i24x2: +; MIPS64R5EL: # %bb.0: # %Entry +; MIPS64R5EL-NEXT: daddiu $sp, $sp, -32 +; MIPS64R5EL-NEXT: .cfi_def_cfa_offset 32 +; MIPS64R5EL-NEXT: dsrl $1, $5, 32 +; MIPS64R5EL-NEXT: sh $1, 20($sp) +; MIPS64R5EL-NEXT: sw $5, 16($sp) +; MIPS64R5EL-NEXT: dsrl $1, $4, 32 +; MIPS64R5EL-NEXT: sh $1, 28($sp) +; MIPS64R5EL-NEXT: lbu $1, 20($sp) +; MIPS64R5EL-NEXT: sw $4, 24($sp) +; MIPS64R5EL-NEXT: dsll $1, $1, 8 +; MIPS64R5EL-NEXT: lbu $2, 19($sp) +; MIPS64R5EL-NEXT: or $1, $1, $2 +; MIPS64R5EL-NEXT: lb $2, 21($sp) +; MIPS64R5EL-NEXT: dsll $2, $2, 16 +; MIPS64R5EL-NEXT: lbu $3, 28($sp) +; MIPS64R5EL-NEXT: dsll $3, $3, 8 +; MIPS64R5EL-NEXT: lb $4, 18($sp) +; MIPS64R5EL-NEXT: lbu $5, 27($sp) +; MIPS64R5EL-NEXT: or $3, $3, $5 +; MIPS64R5EL-NEXT: or $1, $1, $2 +; MIPS64R5EL-NEXT: dsll $2, $4, 16 +; MIPS64R5EL-NEXT: lhu $4, 16($sp) +; MIPS64R5EL-NEXT: or $2, $4, $2 +; MIPS64R5EL-NEXT: lb $4, 29($sp) +; MIPS64R5EL-NEXT: dsll $4, $4, 16 +; MIPS64R5EL-NEXT: insert.d $w0[0], $2 +; MIPS64R5EL-NEXT: insert.d $w0[1], $1 +; MIPS64R5EL-NEXT: or $1, $3, $4 +; MIPS64R5EL-NEXT: lb $2, 26($sp) +; MIPS64R5EL-NEXT: dsll $2, $2, 16 +; MIPS64R5EL-NEXT: lhu $3, 24($sp) +; MIPS64R5EL-NEXT: or $2, $3, $2 +; MIPS64R5EL-NEXT: insert.d $w1[0], $2 +; MIPS64R5EL-NEXT: insert.d $w1[1], $1 +; MIPS64R5EL-NEXT: addv.d $w0, $w1, $w0 +; MIPS64R5EL-NEXT: copy_s.d $1, $w0[0] +; MIPS64R5EL-NEXT: copy_s.d $2, $w0[1] +; MIPS64R5EL-NEXT: dsrl $3, $2, 8 +; MIPS64R5EL-NEXT: sb $3, 12($sp) +; MIPS64R5EL-NEXT: dsrl $3, $2, 16 +; MIPS64R5EL-NEXT: sb $3, 13($sp) +; MIPS64R5EL-NEXT: sb $2, 11($sp) +; MIPS64R5EL-NEXT: sh $1, 8($sp) +; MIPS64R5EL-NEXT: dsrl $1, $1, 16 +; MIPS64R5EL-NEXT: sb $1, 10($sp) +; MIPS64R5EL-NEXT: lh $1, 12($sp) +; MIPS64R5EL-NEXT: dsll $1, $1, 32 +; MIPS64R5EL-NEXT: lwu $2, 8($sp) +; MIPS64R5EL-NEXT: or $2, $2, $1 +; MIPS64R5EL-NEXT: daddiu $sp, $sp, 32 +; MIPS64R5EL-NEXT: jr $ra +; MIPS64R5EL-NEXT: nop +Entry: + %0 = add <2 x i24> %a, %b + ret <2 x i24> %0 +}