Index: lib/Transforms/IPO/MergeFunctions.cpp =================================================================== --- lib/Transforms/IPO/MergeFunctions.cpp +++ lib/Transforms/IPO/MergeFunctions.cpp @@ -617,8 +617,8 @@ /// See method declaration comments for more details. int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { - PointerType *PTyL = dyn_cast(TyL); - PointerType *PTyR = dyn_cast(TyR); + PointerType *PTyL = dyn_cast(TyL->getScalarType()); + PointerType *PTyR = dyn_cast(TyR->getScalarType()); if (DL) { if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL); @@ -845,7 +845,7 @@ // Determine whether two GEP operations perform the same underlying arithmetic. // Read method declaration comments for more details. int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, - const GEPOperator *GEPR) { + const GEPOperator *GEPR) { unsigned int ASL = GEPL->getPointerAddressSpace(); unsigned int ASR = GEPR->getPointerAddressSpace(); @@ -863,16 +863,133 @@ return cmpAPInts(OffsetL, OffsetR); } - if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), - (uint64_t)GEPR->getPointerOperand()->getType())) - return Res; - if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) return Res; + if (GEPL->getPointerOperand()->getType() == + GEPR->getPointerOperand()->getType()) { + // If the types are identical, and all the operands are identical, then we + // don't need to look at offsets, but can just return that these are equal. + for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) + if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) + return Res; + return 0; + } + + if (!DL) { + // If the GEP types aren't identical, and we lack DataLayout, then we + // can't work out the sizes of GEP elements. + return cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), + (uint64_t)GEPR->getPointerOperand()->getType()); + } + + // We have DataLayout available, and can look at the actual offsets which + // would be generated for dynamic indices. For example, + // float a[]; int b[]; a[i] and b[i] will both generate the same offsets from + // their bases. + + // Note that the pointer operand may be a vector of pointers. Take the + // scalar element which holds a pointer. + Type *EltTyL = GEPL->getPointerOperandType()->getScalarType(); + Type *EltTyR = GEPR->getPointerOperandType()->getScalarType(); for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { - if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) + Value *OpL = GEPL->getOperand(i); + Value *OpR = GEPR->getOperand(i); + + // The bases only need to be identical operands, we don't care about + // their offsets. + if (!i) { + if (int Res = cmpValues(OpL, OpR)) + return Res; + continue; + } + + StructType *StTyL = dyn_cast(EltTyL); + StructType *StTyR = dyn_cast(EltTyR); + // TODO: Handle case where only one side is a struct, so long as both + // sides have a constant at this index, and the field offsets are a match. + if (int Res = cmpNumbers(!!StTyL, !!StTyR)) + return Res; + + if (StTyL) { + unsigned IntL = cast(OpL)->getUniqueInteger().getZExtValue(); + unsigned IntR = cast(OpR)->getUniqueInteger().getZExtValue(); + if (int Res = cmpNumbers(IntL, IntR)) + return Res; + + // Make sure that both left and right get the same offset here. + // We don't actually care what fields we skip over, so long as we get + // the same offset. + uint64_t OffsetL = DL->getStructLayout(StTyL)->getElementOffset(IntL); + uint64_t OffsetR = DL->getStructLayout(StTyR)->getElementOffset(IntR); + if (int Res = cmpNumbers(OffsetL, OffsetR)) + return Res; + EltTyL = StTyL->getElementType(IntL); + EltTyR = StTyR->getElementType(IntR); + continue; + } + + EltTyL = cast(EltTyL)->getElementType(); + EltTyR = cast(EltTyR)->getElementType(); + + // If this is a constant subscript, handle it quickly. + const Constant *ConstL = dyn_cast(OpL); + const Constant *ConstR = dyn_cast(OpR); + + if (int Res = cmpNumbers(!!ConstL, !!ConstR)) return Res; + + if (ConstL) { + bool ZeroL = ConstL->isNullValue(); + bool ZeroR = ConstR->isNullValue(); + if (int Res = cmpNumbers(ZeroL, ZeroR)) + return Res; + if (ZeroL) continue; + + // Constant non-zero indices must be the same offset. For example, + // char c[4]; short s[4]; + // c[2] and s[1] have the same offset. + + // If we have scalars, we can just get the integer index, otherwise + // for a vector we look for a splatted value. + const Constant *ConstIntL = nullptr; + const Constant *ConstIntR = nullptr; + if (ConstL->getType()->isIntegerTy()) { + ConstIntL = dyn_cast(ConstL); + ConstIntR = dyn_cast(ConstR); + } else { + assert(ConstL->getType()->isVectorTy() && "Expected vector type"); + ConstIntL = dyn_cast_or_null(ConstL->getSplatValue()); + ConstIntR = dyn_cast_or_null(ConstR->getSplatValue()); + + // If either of the constants aren't splats, then we need to give up. + if (int Res = cmpNumbers(!!ConstIntL, !!ConstIntR)) + return Res; + if (!ConstIntL || !ConstIntR) { + // Its possible for both values to not be splats, in which case we + // need a reliable (ordered) way to compare them. + // We know at this point that the GEPs aren't equal types so can + // compare those. + return cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), + (uint64_t)GEPR->getPointerOperand()->getType()); + } + } + + unsigned IntL = cast(ConstIntL)->getZExtValue(); + unsigned IntR = cast(ConstIntR)->getZExtValue(); + if (int Res = cmpNumbers(DL->getTypeAllocSize(EltTyL) * IntL, + DL->getTypeAllocSize(EltTyR) * IntR)) + return Res; + } else { + // Dynamic indices. These must be the same index value and the same + // underlying size. + if (int Res = cmpValues(OpL, OpR)) + return Res; + + if (int Res = cmpNumbers(DL->getTypeAllocSize(EltTyL), + DL->getTypeAllocSize(EltTyR))) + return Res; + } } return 0; Index: test/Transforms/MergeFunc/gep.ll =================================================================== --- /dev/null +++ test/Transforms/MergeFunc/gep.ll @@ -0,0 +1,79 @@ +; RUN: opt -S -mergefunc -o - %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +%vector_float = type { i64, [8 x float] } +%vector_int = type { { i32, i32 }, [8 x i32] } +%vector_char4 = type { { i32, i32 }, [8 x <4 x i8>] } +%vector_short2 = type { { i32, i32 }, [8 x <2 x i16>] } + +declare void @keepalive(i8*) + +; The i'th element of the float and int vectors should be at the same offset. + +; CHECK-DAG: call void @{{vector_float|vector_int}} + +define linkonce_odr void @vector_float(%vector_float* %this, i64 %i) unnamed_addr { +entry: + %offset_i = getelementptr %vector_float* %this, i64 0, i32 1, i64 %i + %bc_offset_i = bitcast float* %offset_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +define linkonce_odr void @vector_int(%vector_int* %this, i64 %i) unnamed_addr { +entry: + %offset_i = getelementptr %vector_int* %this, i64 0, i32 1, i64 %i + %bc_offset_i = bitcast i32* %offset_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +; The character at index 2 should be at the same position as the short at index 1. + +; CHECK-DAG: call void @{{vector_char4_index2|vector_short2_index1}} + +define linkonce_odr void @vector_char4_index2(%vector_char4* %this, i64 %i) unnamed_addr { +entry: + %offset_i = getelementptr %vector_char4* %this, i64 0, i32 1, i64 %i, i64 2 + %bc_offset_i = bitcast i8* %offset_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +define linkonce_odr void @vector_short2_index1(%vector_short2* %this, i64 %i) unnamed_addr { +entry: + %offset_i = getelementptr %vector_short2* %this, i64 0, i32 1, i64 %i, i64 1 + %bc_offset_i = bitcast i16* %offset_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +; Vector constant indices vs scalar indices shouldn't change whether we can merge. + +; CHECK-DAG: call void @{{vector_char4_vec|vector_short2_vec}} + +%vector_char4_ptr = type <4 x %vector_char4*> +%vector_short2_ptr = type <4 x %vector_short2*> + +define linkonce_odr void @vector_char4_vec(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr { +entry: + %bc_ptr = bitcast %vector_char4_ptr %this to %vector_char4_ptr + %offset_i = getelementptr %vector_char4_ptr %bc_ptr, <4 x i32>, <4 x i32>, <4 x i64> %i, <4 x i32> + %ext_i = extractelement <4 x i8*> %offset_i, i32 0 + %bc_offset_i = bitcast i8* %ext_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +define linkonce_odr void @vector_short2_vec(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr { +entry: + %bc_ptr = bitcast %vector_char4_ptr %this to %vector_short2_ptr + %offset_i = getelementptr %vector_short2_ptr %bc_ptr, <4 x i32>, <4 x i32>, <4 x i64> %i, <4 x i32> + %ext_i = extractelement <4 x i16*> %offset_i, i32 0 + %bc_offset_i = bitcast i16* %ext_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + Index: test/Transforms/MergeFunc/gep2.ll =================================================================== --- /dev/null +++ test/Transforms/MergeFunc/gep2.ll @@ -0,0 +1,95 @@ +; RUN: opt -S -mergefunc -o - %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.10.0" + +%vector_char4 = type { { i32, i32 }, [8 x <4 x i8>] } +%vector_short2 = type { { i32, i32 }, [8 x <2 x i16>] } + +declare void @keepalive(i8*) + +; The character at index 1 should not at the same position as the short at index 1. + +define linkonce_odr void @vector_char4_index1(%vector_char4* %this, i64 %i) unnamed_addr { +entry: +; CHECK-LABEL: @vector_char4_index1 +; CHECK: getelementptr +; CHECK: bitcast +; CHECK: call void @keepalive +; CHECK: ret void + %offset_i = getelementptr %vector_char4* %this, i64 0, i32 1, i64 %i, i64 1 + %bc_offset_i = bitcast i8* %offset_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +define linkonce_odr void @vector_short2_index1(%vector_short2* %this, i64 %i) unnamed_addr { +entry: +; CHECK-LABEL: @vector_short2_index1 +; CHECK: getelementptr +; CHECK: bitcast +; CHECK: call void @keepalive +; CHECK: ret void + %offset_i = getelementptr %vector_short2* %this, i64 0, i32 1, i64 %i, i64 1 + %bc_offset_i = bitcast i16* %offset_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +; Vector constant indices vs scalar indices only support splatted indices for now. + +%vector_char4_ptr = type <4 x %vector_char4*> +%vector_short2_ptr = type <4 x %vector_short2*> + +define linkonce_odr void @vector_char4_vec(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr { +entry: +; CHECK-LABEL: @vector_char4_vec +; CHECK: bitcast +; CHECK: getelementptr +; CHECK: extractelement +; CHECK: bitcast +; CHECK: call void @keepalive +; CHECK: ret void + %bc_ptr = bitcast %vector_char4_ptr %this to %vector_char4_ptr + %offset_i = getelementptr %vector_char4_ptr %bc_ptr, <4 x i32>, <4 x i32>, <4 x i64> %i, <4 x i32> + %ext_i = extractelement <4 x i8*> %offset_i, i32 0 + %bc_offset_i = bitcast i8* %ext_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +; This is the same as the above test, but with a change in the order of the splat constant fields. It ensures we still don't merge here. + +define linkonce_odr void @vector_char4_vec2(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr { +entry: +; CHECK-LABEL: @vector_char4_vec2 +; CHECK: bitcast +; CHECK: getelementptr +; CHECK: extractelement +; CHECK: bitcast +; CHECK: call void @keepalive +; CHECK: ret void + %bc_ptr = bitcast %vector_char4_ptr %this to %vector_char4_ptr + %offset_i = getelementptr %vector_char4_ptr %bc_ptr, <4 x i32>, <4 x i32>, <4 x i64> %i, <4 x i32> + %ext_i = extractelement <4 x i8*> %offset_i, i32 0 + %bc_offset_i = bitcast i8* %ext_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} + +define linkonce_odr void @vector_short2_vec(%vector_char4_ptr %this, <4 x i64> %i) unnamed_addr { +entry: +; CHECK-LABEL: @vector_short2_vec +; CHECK: bitcast +; CHECK: getelementptr +; CHECK: extractelement +; CHECK: bitcast +; CHECK: call void @keepalive +; CHECK: ret void + %bc_ptr = bitcast %vector_char4_ptr %this to %vector_short2_ptr + %offset_i = getelementptr %vector_short2_ptr %bc_ptr, <4 x i32>, <4 x i32>, <4 x i64> %i, <4 x i32> + %ext_i = extractelement <4 x i16*> %offset_i, i32 0 + %bc_offset_i = bitcast i16* %ext_i to i8* + call void @keepalive(i8* %bc_offset_i) + ret void +} \ No newline at end of file