Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -77,8 +77,8 @@ return 1; return ST->hasV6T2Ops() ? 2 : 3; } - // Thumb1. - if (SImmVal >= 0 && SImmVal < 256) + // Thumb1, any i8 imm cost 1. + if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256)) return 1; if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) return 2; Index: test/CodeGen/Thumb/consthoist-imm8-costs-1.ll =================================================================== --- /dev/null +++ test/CodeGen/Thumb/consthoist-imm8-costs-1.ll @@ -0,0 +1,62 @@ +; RUN: llc %s -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv6m-none-unknown-musleabi" + +%struct.foo = type { i8, i8, i8, i8, i32, i16, i8 } + +@global.3 = dso_local local_unnamed_addr global [10 x %struct.foo] zeroinitializer, align 4 + +; Check that store-merging generates a single str i32 rather than strb+strb+strh, +; i.e., -1 is not moved by constant-hoisting. +; CHECK: movs [[R1:r[0-9]+]], #255 +; CHECK: lsls [[R2:r[0-9]+]], [[R1]], #16 +; CHECK-NEXT: str [[R2]] +; CHECK: lsls [[R3:r[0-9]+]], [[R1]], #16 +; CHECK-NEXT: str [[R3]] +; CHECK-NOT: strh +; CHECK-NOT: strb +; Function Attrs: norecurse nounwind optsize ssp writeonly +define dso_local void @ham() #0 { +bb: + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp = phi i32 [ 0, %bb ], [ %tmp6, %bb1 ] + %tmp2 = getelementptr inbounds [10 x %struct.foo], [10 x %struct.foo]* @global.3, i32 0, i32 %tmp, i32 1 + store i8 0, i8* %tmp2, align 1, !tbaa !3 + %tmp3 = getelementptr inbounds [10 x %struct.foo], [10 x %struct.foo]* @global.3, i32 0, i32 %tmp, i32 0 + store i8 0, i8* %tmp3, align 4, !tbaa !9 + %tmp4 = getelementptr inbounds [10 x %struct.foo], [10 x %struct.foo]* @global.3, i32 0, i32 %tmp, i32 2 + store i8 -1, i8* %tmp4, align 2, !tbaa !10 + %tmp5 = getelementptr inbounds [10 x %struct.foo], [10 x %struct.foo]* @global.3, i32 0, i32 %tmp, i32 3 + store i8 0, i8* %tmp5, align 1, !tbaa !11 + %tmp6 = add nuw nsw i32 %tmp, 1 + %tmp7 = icmp eq i32 %tmp6, 8 + br i1 %tmp7, label %bb8, label %bb1 + +bb8: ; preds = %bb1 + store i8 0, i8* getelementptr inbounds ([10 x %struct.foo], [10 x %struct.foo]* @global.3, i32 0, i32 8, i32 1), align 1, !tbaa !3 + store i8 0, i8* getelementptr inbounds ([10 x %struct.foo], [10 x %struct.foo]* @global.3, i32 0, i32 8, i32 0), align 4, !tbaa !9 + store i8 -1, i8* getelementptr inbounds ([10 x %struct.foo], [10 x %struct.foo]* @global.3, i32 0, i32 8, i32 2), align 2, !tbaa !10 + store i8 0, i8* getelementptr inbounds ([10 x %struct.foo], [10 x %struct.foo]* @global.3, i32 0, i32 8, i32 3), align 1, !tbaa !11 + ret void +} + +attributes #0 = { norecurse nounwind optsize ssp writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m0" "target-features"="+armv6-m,+strict-align,+thumb-mode,-crc,-dotprod,-dsp,-fp16fml,-hwdiv,-hwdiv-arm,-ras" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 1} +!2 = !{!"Snapdragon LLVM ARM Compiler 8.0.0 (based on LLVM 8.0.0)"} +!3 = !{!4, !5, i64 1} +!4 = !{!"", !5, i64 0, !5, i64 1, !5, i64 2, !5, i64 3, !7, i64 4, !8, i64 8, !5, i64 10} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!"long", !5, i64 0} +!8 = !{!"short", !5, i64 0} +!9 = !{!4, !5, i64 0} +!10 = !{!4, !5, i64 2} +!11 = !{!4, !5, i64 3}