diff --git a/llvm/include/llvm/Transforms/Utils/VNCoercion.h b/llvm/include/llvm/Transforms/Utils/VNCoercion.h --- a/llvm/include/llvm/Transforms/Utils/VNCoercion.h +++ b/llvm/include/llvm/Transforms/Utils/VNCoercion.h @@ -75,7 +75,8 @@ /// It inserts instructions to do so at InsertPt, and returns the extracted /// value. Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, - Instruction *InsertPt, const DataLayout &DL); + Instruction *InsertPt, const DataLayout &DL, + bool SrcValIsLoadValue = false); // This is the same as getValueForLoad, except it performs no insertion. // It only allows constant inputs. Constant *getConstantValueForLoad(Constant *SrcVal, unsigned Offset, diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1022,7 +1022,8 @@ Res = CoercedLoad; combineMetadataForCSE(CoercedLoad, Load, false); } else { - Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL); + Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL, + /*SrcValIsLoadValue*/ true); // We are adding a new user for this load, for which the original // metadata may not hold. Additionally, the new load may have a different // size and type, so their metadata cannot be combined in any diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -292,7 +292,8 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset, Type *LoadTy, IRBuilderBase &Builder, - const DataLayout &DL) { + const DataLayout &DL, + bool SrcValIsLoadValue) { LLVMContext &Ctx = SrcVal->getType()->getContext(); // If two pointers are in the same address space, they have the same size, @@ -326,14 +327,18 @@ SrcVal = Builder.CreateLShr(SrcVal, ConstantInt::get(SrcVal->getType(), ShiftAmt)); - if (LoadSize != StoreSize) + if (LoadSize != StoreSize) { + if (SrcValIsLoadValue && !isGuaranteedNotToBePoison(SrcVal)) + SrcVal = Builder.CreateFreeze(SrcVal); SrcVal = Builder.CreateTruncOrBitCast(SrcVal, IntegerType::get(Ctx, LoadSize * 8)); + } return SrcVal; } Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy, - Instruction *InsertPt, const DataLayout &DL) { + Instruction *InsertPt, const DataLayout &DL, + bool SrcValIsLoadValue) { #ifndef NDEBUG unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()).getFixedValue(); @@ -341,7 +346,8 @@ assert(Offset + LoadSize <= SrcValSize); #endif IRBuilder<> Builder(InsertPt); - SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL); + SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL, + SrcValIsLoadValue); return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL); } diff --git a/llvm/test/Transforms/GVN/PRE/atomic.ll b/llvm/test/Transforms/GVN/PRE/atomic.ll --- a/llvm/test/Transforms/GVN/PRE/atomic.ll +++ b/llvm/test/Transforms/GVN/PRE/atomic.ll @@ -428,8 +428,9 @@ ; CHECK-LABEL: define i64 @narrow ; CHECK-SAME: (ptr [[P1:%.*]]) { ; CHECK-NEXT: [[A64:%.*]] = load atomic i64, ptr [[P1]] unordered, align 4 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A64]] to i32 -; CHECK-NEXT: [[B64:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[A64]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[B64:%.*]] = sext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[RES:%.*]] = sub i64 [[A64]], [[B64]] ; CHECK-NEXT: ret i64 [[RES]] ; diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll --- a/llvm/test/Transforms/GVN/PRE/rle.ll +++ b/llvm/test/Transforms/GVN/PRE/rle.ll @@ -914,8 +914,9 @@ ; LE-NEXT: entry: ; LE-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P:%.*]], align 4 ; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 8 -; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; LE-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32 +; LE-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; LE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +; LE-NEXT: [[CONV:%.*]] = zext i8 [[TMP2]] to i32 ; LE-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]] ; LE-NEXT: ret i32 [[ADD]] ; @@ -923,8 +924,9 @@ ; BE-NEXT: entry: ; BE-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P:%.*]], align 4 ; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 16 -; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32 +; BE-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; BE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP2]] to i32 ; BE-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]] ; BE-NEXT: ret i32 [[ADD]] ; @@ -1054,10 +1056,11 @@ ; LE-NEXT: call void @use.i8(i8 [[V_1]]) ; LE-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4 ; LE-NEXT: call void @use.i32(i32 [[V_1_32]]) -; LE-NEXT: [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8 +; LE-NEXT: [[TMP0:%.*]] = freeze i32 [[V_1_32]] +; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 ; LE-NEXT: br label [[LOOP:%.*]] ; LE: loop: -; LE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[V_I_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] +; LE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[V_I_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] ; LE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ] ; LE-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] ; LE-NEXT: call void @use.i8(i8 [[V_I]]) @@ -1081,10 +1084,11 @@ ; BE-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4 ; BE-NEXT: call void @use.i32(i32 [[V_1_32]]) ; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24 -; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 +; BE-NEXT: [[TMP1:%.*]] = freeze i32 [[TMP0]] +; BE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 ; BE-NEXT: br label [[LOOP:%.*]] ; BE: loop: -; BE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[V_I_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] +; BE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP2]], [[ENTRY:%.*]] ], [ [[V_I_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] ; BE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ] ; BE-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] ; BE-NEXT: call void @use.i8(i8 [[V_I]]) @@ -1135,10 +1139,11 @@ ; LE-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4 ; LE-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 ; LE-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 8 -; LE-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8 +; LE-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] +; LE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i8 ; LE-NEXT: br label [[BB5:%.*]] ; LE: bb5: -; LE-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], [[BB5]] ], [ [[TMP1]], [[BB:%.*]] ] +; LE-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], [[BB5]] ], [ [[TMP2]], [[BB:%.*]] ] ; LE-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4 ; LE-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]] ; LE-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1 @@ -1151,10 +1156,11 @@ ; BE-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4 ; BE-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 ; BE-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 48 -; BE-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8 +; BE-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] +; BE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i8 ; BE-NEXT: br label [[BB5:%.*]] ; BE: bb5: -; BE-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], [[BB5]] ], [ [[TMP1]], [[BB:%.*]] ] +; BE-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], [[BB5]] ], [ [[TMP2]], [[BB:%.*]] ] ; BE-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4 ; BE-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]] ; BE-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1 diff --git a/llvm/test/Transforms/GVN/metadata.ll b/llvm/test/Transforms/GVN/metadata.ll --- a/llvm/test/Transforms/GVN/metadata.ll +++ b/llvm/test/Transforms/GVN/metadata.ll @@ -255,9 +255,10 @@ ; CHECK-SAME: (ptr [[P:%.*]]) { ; CHECK-NEXT: [[VAL:%.*]] = load ptr, ptr [[P]], align 8 ; CHECK-NEXT: [[VAL_INT:%.*]] = ptrtoint ptr [[VAL]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[VAL_INT]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[VAL_INT]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: call void @use.i64(i64 [[VAL_INT]]) -; CHECK-NEXT: call void @use.i32(i32 [[TMP1]]) +; CHECK-NEXT: call void @use.i32(i32 [[TMP2]]) ; CHECK-NEXT: ret void ; %val = load ptr, ptr %p, align 8, !nonnull !{} @@ -272,9 +273,10 @@ ; CHECK-LABEL: define void @load_i64_range_to_i32_range ; CHECK-SAME: (ptr [[P:%.*]]) { ; CHECK-NEXT: [[VAL:%.*]] = load i64, ptr [[P]], align 8 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[VAL]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[VAL]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 ; CHECK-NEXT: call void @use.i64(i64 [[VAL]]) -; CHECK-NEXT: call void @use.i32(i32 [[TMP1]]) +; CHECK-NEXT: call void @use.i32(i32 [[TMP2]]) ; CHECK-NEXT: ret void ; %val = load i64, ptr %p, align 8, !range !{i64 0, i64 10}