Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -2493,7 +2493,7 @@ Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); int j = 0; - ConstantInt *C0 = ConstantInt::get(CGF.Int32Ty, 0); + ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ++ai, ++j) { llvm::Type *ArgTy = ai->getType(); @@ -2733,7 +2733,7 @@ Ops[0] = Builder.CreateBitCast(Ops[0], Ty); LoadInst *Ld = Builder.CreateLoad(Ops[0]); Ld->setAlignment(cast(Align)->getZExtValue()); - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); + llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Ops[0] = Builder.CreateInsertElement(V, Ld, CI); return EmitNeonSplat(Ops[0], CI); } @@ -3703,7 +3703,7 @@ llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); Op = Builder.CreateBitCast(Op, Int16Ty); Value *V = UndefValue::get(VTy); - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); + llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Op = Builder.CreateInsertElement(V, Op, CI); return Op; } @@ -3712,7 +3712,7 @@ llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); Op = Builder.CreateBitCast(Op, Int8Ty); Value *V = UndefValue::get(VTy); - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); + llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Op = Builder.CreateInsertElement(V, Op, CI); return Op; } @@ -3729,7 +3729,7 @@ Ops[1] = vectorWrapScalar8(Ops[1]); llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); - Constant *CI = ConstantInt::get(Int32Ty, 0); + Constant *CI = ConstantInt::get(SizeTy, 0); return Builder.CreateExtractElement(V, CI, "lane0"); } @@ -3745,7 +3745,7 @@ Ops[1] = vectorWrapScalar16(Ops[1]); llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); - Constant *CI = ConstantInt::get(Int32Ty, 0); + Constant *CI = ConstantInt::get(SizeTy, 0); return Builder.CreateExtractElement(V, CI, "lane0"); } @@ -3950,8 +3950,8 @@ Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f64, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); - llvm::Value *Idx0 = llvm::ConstantInt::get(Int32Ty, 0); - llvm::Value *Idx1 = llvm::ConstantInt::get(Int32Ty, 1); + llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); + llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); // Pairwise addition of a v2f64 into a scalar f64. @@ -3963,8 +3963,8 @@ Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f64, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); - llvm::Value *Idx0 = llvm::ConstantInt::get(Int32Ty, 0); - llvm::Value *Idx1 = llvm::ConstantInt::get(Int32Ty, 1); + llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); + llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); // Pairwise addition of a v2f64 into a scalar f64. @@ -3976,8 +3976,8 @@ Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f32, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); - llvm::Value *Idx0 = llvm::ConstantInt::get(Int32Ty, 0); - llvm::Value *Idx1 = llvm::ConstantInt::get(Int32Ty, 1); + llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); + llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); // Pairwise addition of a v2f32 into a scalar f32. @@ -4228,7 +4228,7 @@ llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), ProductOps, "vqdmlXl"); - Constant *CI = ConstantInt::get(Int32Ty, 0); + Constant *CI = ConstantInt::get(SizeTy, 0); Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 @@ -4325,7 +4325,7 @@ llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), ProductOps, "vqdmlXl"); - Constant *CI = ConstantInt::get(Int32Ty, 0); + Constant *CI = ConstantInt::get(SizeTy, 0); Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); Ops.pop_back(); @@ -5232,7 +5232,7 @@ Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[0] = Builder.CreateLoad(Ops[0]); - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); + llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); return EmitNeonSplat(Ops[0], CI); } @@ -5611,7 +5611,7 @@ // extract (0, 1) unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; - llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index); + llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); // cast pointer to i64 & store Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -1331,7 +1331,7 @@ const VectorType *ExprVT = LV.getType()->getAs(); if (!ExprVT) { unsigned InIdx = getAccessedFieldNo(0, Elts); - llvm::Value *Elt = llvm::ConstantInt::get(Int32Ty, InIdx); + llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx); return RValue::get(Builder.CreateExtractElement(Vec, Elt)); } @@ -1590,7 +1590,7 @@ } else { // If the Src is a scalar (not a vector) it must be updating one element. unsigned InIdx = getAccessedFieldNo(0, Elts); - llvm::Value *Elt = llvm::ConstantInt::get(Int32Ty, InIdx); + llvm::Value *Elt = llvm::ConstantInt::get(SizeTy, InIdx); Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt); } @@ -2307,7 +2307,6 @@ // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); assert(LHS.isSimple() && "Can only subscript lvalue vectors here!"); - Idx = Builder.CreateIntCast(Idx, Int32Ty, IdxSigned, "vidx"); return LValue::MakeVectorElt(LHS.getAddress(), Idx, E->getBase()->getType(), LHS.getAlignment()); } Index: lib/CodeGen/CGExprScalar.cpp =================================================================== --- lib/CodeGen/CGExprScalar.cpp +++ lib/CodeGen/CGExprScalar.cpp @@ -940,9 +940,8 @@ MTy->getNumElements()); Value* NewV = llvm::UndefValue::get(RTy); for (unsigned i = 0, e = MTy->getNumElements(); i != e; ++i) { - Value *IIndx = Builder.getInt32(i); + Value *IIndx = llvm::ConstantInt::get(CGF.SizeTy, i); Value *Indx = Builder.CreateExtractElement(Mask, IIndx, "shuf_idx"); - Indx = Builder.CreateZExt(Indx, CGF.Int32Ty, "idx_zext"); Value *VExt = Builder.CreateExtractElement(LHS, Indx, "shuf_elt"); NewV = Builder.CreateInsertElement(NewV, VExt, IIndx, "shuf_ins"); @@ -1073,8 +1072,6 @@ if (CGF.SanOpts->ArrayBounds) CGF.EmitBoundsCheck(E, E->getBase(), Idx, IdxTy, /*Accessed*/true); - bool IdxSigned = IdxTy->isSignedIntegerOrEnumerationType(); - Idx = Builder.CreateIntCast(Idx, CGF.Int32Ty, IdxSigned, "vecidxcast"); return Builder.CreateExtractElement(Base, Idx, "vecext"); } Index: test/CodeGen/builtinshufflevector2.c =================================================================== --- test/CodeGen/builtinshufflevector2.c +++ test/CodeGen/builtinshufflevector2.c @@ -6,23 +6,23 @@ // CHECK-LABEL: define void @clang_shufflevector_v_v( void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) { // CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, -// CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i32 0 -// CHECK: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[I]] +// CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 0 +// CHECK: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i{{[0-9]+}} [[I]] // // Here is where ToT Clang code generation makes a mistake. // It uses [[I]] as the insertion index instead of 0. // Similarly on the remaining insertelement. -// CHECK: [[V:%[a-zA-Z0-9._]+]] = insertelement <4 x float> undef, float [[E]], i32 0 +// CHECK: [[V:%[a-zA-Z0-9._]+]] = insertelement <4 x float> undef, float [[E]], i{{[0-9]+}} 0 -// CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i32 1 -// CHECK: [[E:%.*]] = extractelement <4 x float> [[X]], i32 [[I]] -// CHECK: [[V2:%.*]] = insertelement <4 x float> [[V]], float [[E]], i32 1 -// CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i32 2 -// CHECK: [[E:%.*]] = extractelement <4 x float> [[X]], i32 [[I]] -// CHECK: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[E]], i32 2 -// CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i32 3 -// CHECK: [[E:%.*]] = extractelement <4 x float> [[X]], i32 [[I]] -// CHECK: [[V4:%.*]] = insertelement <4 x float> [[V3]], float [[E]], i32 3 +// CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 1 +// CHECK: [[E:%.*]] = extractelement <4 x float> [[X]], i{{[0-9]+}} [[I]] +// CHECK: [[V2:%.*]] = insertelement <4 x float> [[V]], float [[E]], i{{[0-9]+}} 1 +// CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 2 +// CHECK: [[E:%.*]] = extractelement <4 x float> [[X]], i{{[0-9]+}} [[I]] +// CHECK: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[E]], i{{[0-9]+}} 2 +// CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 3 +// CHECK: [[E:%.*]] = extractelement <4 x float> [[X]], i{{[0-9]+}} [[I]] +// CHECK: [[V4:%.*]] = insertelement <4 x float> [[V3]], float [[E]], i{{[0-9]+}} 3 // CHECK: store <4 x float> [[V4]], <4 x float>* {{%.*}}, *A = __builtin_shufflevector( x, mask ); }