diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1171,8 +1171,8 @@ /// This behaves as if the value were coerced through memory, so on big-endian /// targets the high bits are preserved in a truncation, while little-endian /// targets preserve the low bits. -static llvm::Value *CoerceIntOrPtrToIntOrPtr(llvm::Value *Val, - llvm::Type *Ty, +static llvm::Value *CoerceIntOrPtrToIntOrPtr(llvm::Value *Val, llvm::Type *Ty, + bool UndefUnusedBits, CodeGenFunction &CGF) { if (Val->getType() == Ty) return Val; @@ -1206,8 +1206,26 @@ Val = CGF.Builder.CreateShl(Val, DstSize - SrcSize, "coerce.highbits"); } } else { - // Little-endian targets preserve the low bits. No shifts required. - Val = CGF.Builder.CreateIntCast(Val, DestIntTy, false, "coerce.val.ii"); + llvm::IntegerType *OrigType = cast(Val->getType()); + unsigned OrigWidth = OrigType->getBitWidth(); + unsigned DestWidth = cast(DestIntTy)->getBitWidth(); + if (UndefUnusedBits && DestWidth > OrigWidth && + DestWidth % OrigWidth == 0) { + // Insert the value in an undef vector, and then bitcast the vector to + // the destination type. Unused vector elements and the corresponding + // bits of the destination value can be treated as undef by + // optimizations. + llvm::VectorType *VecType = + llvm::VectorType::get(OrigType, DestWidth / OrigWidth, + /*Scalable=*/false); + llvm::Value *Vec = CGF.Builder.CreateInsertElement( + llvm::UndefValue::get(VecType), Val, CGF.Builder.getInt8(0), + "coerce.val.vec"); + Val = CGF.Builder.CreateBitCast(Vec, DestIntTy, "coerce.val.vec.ii"); + } else { + // Little-endian targets preserve the low bits. No shifts required. + Val = CGF.Builder.CreateIntCast(Val, DestIntTy, false, "coerce.val.ii"); + } } } @@ -1216,8 +1234,6 @@ return Val; } - - /// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as /// a pointer to an object of type \arg Ty, known to be aligned to /// \arg SrcAlign bytes. @@ -1226,6 +1242,7 @@ /// destination type; in this situation the values of bits which not /// present in the src are undefined. static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty, + bool UndefUnusedBits, CodeGenFunction &CGF) { llvm::Type *SrcTy = Src.getElementType(); @@ -1248,7 +1265,7 @@ if ((isa(Ty) || isa(Ty)) && (isa(SrcTy) || isa(SrcTy))) { llvm::Value *Load = CGF.Builder.CreateLoad(Src); - return CoerceIntOrPtrToIntOrPtr(Load, Ty, CGF); + return CoerceIntOrPtrToIntOrPtr(Load, Ty, UndefUnusedBits, CGF); } // If load is legal, just bitcast the src pointer. @@ -1314,9 +1331,8 @@ /// /// This safely handles the case when the src type is larger than the /// destination type; the upper bits of the src will be lost. -static void CreateCoercedStore(llvm::Value *Src, - Address Dst, - bool DstIsVolatile, +static void CreateCoercedStore(llvm::Value *Src, Address Dst, + bool DstIsVolatile, bool UndefUnusedBits, CodeGenFunction &CGF) { llvm::Type *SrcTy = Src->getType(); llvm::Type *DstTy = Dst.getElementType(); @@ -1346,7 +1362,7 @@ // extension or truncation to the desired type. if ((isa(SrcTy) || isa(SrcTy)) && (isa(DstTy) || isa(DstTy))) { - Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF); + Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, UndefUnusedBits, CGF); CGF.Builder.CreateStore(Src, Dst, DstIsVolatile); return; } @@ -2873,7 +2889,8 @@ assert(NumIRArgs == 1); auto AI = Fn->getArg(FirstIRArg); AI->setName(Arg->getName() + ".coerce"); - CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, *this); + CreateCoercedStore(AI, Ptr, /*DstIsVolatile=*/false, + /*UndefUnusedBits=*/false, *this); } // Match to what EmitParmDecl is expecting for this type. @@ -3458,7 +3475,8 @@ // If the value is offset in memory, apply the offset now. Address V = emitAddressAtOffset(*this, ReturnValue, RetAI); - RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this); + RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), + /*UndefUnusedBits=*/true, *this); } // In ARC, end functions that return a retainable type with a call @@ -4928,8 +4946,8 @@ } else { // In the simple case, just pass the coerced loaded value. assert(NumIRArgs == 1); - llvm::Value *Load = - CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this); + llvm::Value *Load = CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), + /*UndefUnusedBits=*/false, *this); if (CallInfo.isCmseNSCall()) { // For certain parameter types, clear padding bits, as they may reveal @@ -5406,7 +5424,8 @@ // If the value is offset in memory, apply the offset now. Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI); - CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this); + CreateCoercedStore(CI, StorePtr, DestIsVolatile, /*UndefUnusedBits=*/true, + *this); return convertTempToRValue(DestPtr, RetTy, SourceLocation()); } diff --git a/clang/test/CodeGen/arm64-arguments.c b/clang/test/CodeGen/arm64-arguments.c --- a/clang/test/CodeGen/arm64-arguments.c +++ b/clang/test/CodeGen/arm64-arguments.c @@ -745,3 +745,63 @@ // CHECK: call <3 x float> (i32, ...) @test_hva_v3(i32 1, [4 x <4 x float>] {{.*}}) return test_hva_v3(1, *a); } + +char ret_coerce1(void) { + // CHECK-LABEL: i8 @ret_coerce1 + // CHECK: alloca i8 + // CHECK-NEXT: load i8 + // CHECK-NEXT: ret i8 +} + +short ret_coerce2(void) { + // CHECK-LABEL: i16 @ret_coerce2 + // CHECK: alloca i16 + // CHECK-NEXT: load i16 + // CHECK-NEXT: ret i16 +} + +int ret_coerce3(void) { + // CHECK-LABEL: i32 @ret_coerce3 + // CHECK: alloca i32 + // CHECK-NEXT: load i32 + // CHECK-NEXT: ret i32 +} + +struct ret_coerce_char { + char f0; +}; +struct ret_coerce_char ret_coerce4(void) { + // CHECK-LABEL: i64 @ret_coerce4 + // CHECK: %[[ALLOCA:.*]] = alloca %struct.ret_coerce_char + // CHECK: %[[GEP:.*]] = getelementptr {{.*}} %[[ALLOCA]], i32 0, i32 0 + // CHECK: %[[LOAD:.*]] = load i8, i8* %[[GEP]] + // CHECK: %[[VEC:.*]] = insertelement <8 x i8> undef, i8 %[[LOAD]], i8 0 + // CHECK: %[[CAST:.*]] = bitcast <8 x i8> %[[VEC]] to i64 + // CHECK: ret i64 %[[CAST]] +} + +struct ret_coerce_short { + short f0; +}; +struct ret_coerce_short ret_coerce5(void) { + // CHECK-LABEL: i64 @ret_coerce5 + // CHECK: %[[ALLOCA:.*]] = alloca %struct.ret_coerce_short + // CHECK: %[[GEP:.*]] = getelementptr {{.*}} %[[ALLOCA]], i32 0, i32 0 + // CHECK: %[[LOAD:.*]] = load i16, i16* %[[GEP]] + // CHECK: %[[VEC:.*]] = insertelement <4 x i16> undef, i16 %[[LOAD]], i8 0 + // CHECK: %[[CAST:.*]] = bitcast <4 x i16> %[[VEC]] to i64 + // CHECK: ret i64 %[[CAST]] +} + +struct ret_coerce_int { + int f0; +}; +struct ret_coerce_int ret_coerce6(void) { + // CHECK-LABEL: i64 @ret_coerce6 + // CHECK: %[[ALLOCA:.*]] = alloca %struct.ret_coerce_int + // CHECK: %[[GEP:.*]] = getelementptr {{.*}} %[[ALLOCA]], i32 0, i32 0 + // CHECK: %[[LOAD:.*]] = load i32, i32* %[[GEP]] + // CHECK: %[[VEC:.*]] = insertelement <2 x i32> undef, i32 %[[LOAD]], i8 0 + // CHECK: %[[CAST:.*]] = bitcast <2 x i32> %[[VEC]] to i64 + // CHECK: ret i64 %[[CAST]] +} diff --git a/clang/test/CodeGenCXX/trivial_abi.cpp b/clang/test/CodeGenCXX/trivial_abi.cpp --- a/clang/test/CodeGenCXX/trivial_abi.cpp +++ b/clang/test/CodeGenCXX/trivial_abi.cpp @@ -202,7 +202,8 @@ // CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_TRIVIAL:.*]], align 4 // CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_TRIVIAL]], %[[STRUCT_TRIVIAL]]* %[[RETVAL]], i32 0, i32 0 // CHECK: %[[V0:.*]] = load i32, i32* %[[COERCE_DIVE]], align 4 -// CHECK: %[[COERCE_VAL_II:.*]] = zext i32 %[[V0]] to i64 +// CHECK: %[[COERCE_VAL_VEC:.*]] = insertelement <2 x i32> undef, i32 %[[V0]], i8 0 +// CHECK: %[[COERCE_VAL_II:.*]] = bitcast <2 x i32> %[[COERCE_VAL_VEC]] to i64 // CHECK: ret i64 %[[COERCE_VAL_II]] // CHECK: }