Index: lib/Target/ARM/ARMConstantIslandPass.cpp =================================================================== --- lib/Target/ARM/ARMConstantIslandPass.cpp +++ lib/Target/ARM/ARMConstantIslandPass.cpp @@ -510,7 +510,6 @@ const DataLayout &TD = MF->getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); - assert(Size >= 4 && "Too small constant pool entry"); unsigned Align = CPs[i].getAlignment(); assert(isPowerOf2_32(Align) && "Invalid alignment"); // Verify that all constant pool entries are a multiple of their alignment. @@ -820,6 +819,11 @@ Scale = 4; // +-(offset_8*4) NegOk = true; break; + case ARM::VLDRH: + Bits = 8; + Scale = 2; // +-(offset_8*2) + NegOk = true; + break; case ARM::tLDRHi: Bits = 5; @@ -1421,6 +1425,10 @@ assert(!isThumb || getITInstrPredicate(*MI, PredReg) == ARMCC::AL)); NewMBB = splitBlockBeforeInstr(&*MI); + + // 4 byte align the next block after the constant pool when the CPE is a + // 16-bit value in ARM mode, and 2 byte for Thumb. + NewMBB->setAlignment(isThumb ? 1 : 2); } /// handleConstantPoolUser - Analyze the specified user, checking to see if it Index: test/CodeGen/ARM/fp16-litpool.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fp16-litpool.ll @@ -0,0 +1,102 @@ +; RUN: llc -mtriple=arm-linux-gnueabihf %s -mattr=+fullfp16 -o - | FileCheck %s --check-prefix=CHECK-ARM +; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -mattr=+fullfp16 -o - | FileCheck %s --check-prefix=CHECK-THUMB + +; We want to test 2 things here: +; 1) that f16 literals are accepted as litpool entries, +; 2) if the litpool needs to be inserted in the middle of a big +; block, then we need to 4-byte align the next instruction +; in ARM mode. +; +; These ARM and THUMB test cases have been carefully created using the space +; intrinsics to trigger the (edge) cases where a 2-byte CP entry is out of +; range, and inserted directly after an existing CP entry. This checks that we +; are emitting the correct align directive and that the next instruction is not +; misaligned. + +@LL = common dso_local global i64 0, align 8 + +declare i32 @llvm.arm.space(i32, i32) + +; Function Attrs: minsize nounwind optsize +define dso_local i32 @ARM(i32 %A.coerce) local_unnamed_addr #0 { +entry: + %S = alloca half, align 2 + %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 + %0 = bitcast i16 %tmp.0.extract.trunc to half + %S.0.S.0..sroa_cast = bitcast half* %S to i8* + store volatile half 0xH3C00, half* %S, align 2 + store volatile i64 4242424242424242, i64* @LL, align 8 + + call i32 @llvm.arm.space(i32 8920, i32 undef) + + %S.0.S.0.570 = load volatile half, half* %S, align 2 + %add298 = fadd half %S.0.S.0.570, 0xH2E66 + store volatile half %add298, half* %S, align 2 + + call i32 @llvm.arm.space(i32 1350, i32 undef) + + %3 = bitcast half %add298 to i16 + %tmp343.0.insert.ext = zext i16 %3 to i32 + ret i32 %tmp343.0.insert.ext + +; CHECK-ARM-LABEL: ARM: + +; CHECK-ARM: b .LBB0_{{.}} +; CHECK-ARM: .p2align 2 +; CHECK-ARM: .LCPI0_{{.}}: +; CHECK-ARM: .long LL +; CHECK-ARM: .p2align 2 +; CHECK-ARM: .LCPI0_{{.}}: +; CHECK-ARM: .long 1576323506 +; CHECK-ARM: .p2align 2 +; CHECK-ARM: .LCPI0_{{.}}: +; CHECK-ARM: .long 987766 +; CHECK-ARM: .p2align 1 +; CHECK-ARM: .LCPI0_{{.}}: +; CHECK-ARM: .short 11878 +; CHECK-ARM: .p2align 2 +; CHECK-ARM: .LBB0_{{.}}: +; CHECK-ARM: .zero 8920 +} + +define dso_local i32 @THUMB(i32 %A.coerce) local_unnamed_addr #1 { +entry: + %F = alloca float, align 4 + %S = alloca half, align 2 + %tmp.0.extract.trunc = trunc i32 %A.coerce to i16 + %0 = bitcast i16 %tmp.0.extract.trunc to half + %F.0.F.0..sroa_cast = bitcast float* %F to i8* + store volatile float 4.200000e+01, float* %F, align 4 + %S.0.S.0..sroa_cast = bitcast half* %S to i8* + store volatile half 0xH3C00, half* %S, align 2 + %S.0.S.0.142 = load volatile half, half* %S, align 2 + + call i32 @llvm.arm.space(i32 1230, i32 undef) + + %add42 = fadd half %S.0.S.0.142, 0xH2E66 + store volatile half %add42, half* %S, align 2 + + call i32 @llvm.arm.space(i32 1330, i32 undef) + + %S.0.S.0.119 = load volatile half, half* %S, align 2 + %3 = bitcast half %add42 to i16 + %tmp87.0.insert.ext = zext i16 %3 to i32 + ret i32 %tmp87.0.insert.ext + +; CHECK-THUMB-LABEL: THUMB: + +; CHECK-THUMB: b.w .LBB1_{{.}} +; CHECK-THUMB: .p2align 2 +; CHECK-THUMB: .LCPI{{.*}}: +; CHECK-THUMB: .long 1109917696 +; CHECK-THUMB: .p2align 1 +; CHECK-THUMB: .LCPI{{.*}}: +; CHECK-THUMB: .short 11878 +; CHECK-THUMB: .p2align 1 +; CHECK-THUMB: .LBB1_{{.}}: +; CHECK-THUMB: .zero 1230 + +} + +attributes #0 = { minsize nounwind optsize "target-features"="+crc,+crypto,+dsp,+fp-armv8,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+strict-align,-thumb-mode" } +attributes #1 = { minsize nounwind optsize "target-features"="+crc,+crypto,+dsp,+fp-armv8,+fullfp16,+hwdiv,+hwdiv-arm,+neon,+ras,+strict-align,+thumb-mode" }