Index: llvm/test/CodeGen/AArch64/bitfield-insert.ll =================================================================== --- llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -285,6 +285,117 @@ ret i32 %shl.4 } +; Optimal codegen is to use bfi, which simplifies away two instrutions (%mask and %bit-field-pos-op). +; As a comparsion, 'test_orr_not_bfi' shows when orr is better than bfi. +define i64 @test_bfi_not_orr_i64(i64 %0, i64 %1) { +; CHECK-LABEL: test_bfi_not_orr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x1, #0xff +; CHECK-NEXT: bfi x8, x0, #8, #8 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret + %bfi_dst = and i64 %1, 255 + %mask = and i64 %0, 255 + %bit-field-pos-op = shl i64 %mask, 8 + %or_res = or i64 %bit-field-pos-op, %bfi_dst + ret i64 %or_res +} + +; Optimal codegen should fold the left shift (%3) into orr. +define i64 @test_orr_not_bfi_i64(i64 %0) { +; CHECK-LABEL: test_orr_not_bfi_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x0, #0xff +; CHECK-NEXT: bfi x8, x0, #8, #8 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret + %2 = and i64 %0, 255 + %3 = shl i64 %2, 8 + %4 = or i64 %2, %3 + ret i64 %4 +} + +; bfi is better than orr, since it would simplify away two instructions (%mask and %bit-field-pos-op). +define i32 @test_bfi_not_orr_i32(i32 %0, i32 %1) { +; CHECK-LABEL: test_bfi_not_orr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: bfi w8, w0, #8, #8 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %bfi_dst = and i32 %1, 255 + %mask = and i32 %0, 255 + %bit-field-pos-op = shl i32 %mask, 8 + %or_res = or i32 %bit-field-pos-op, %bfi_dst + ret i32 %or_res +} + +; orr is better than bfi, since both simplify away one instruction (%3) but orr has shorter latency and higher throughput. +define i32 @test_orr_not_bfi_i32(i32 %0) { +; CHECK-LABEL: test_orr_not_bfi_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: bfi w8, w0, #8, #8 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %2 = and i32 %0, 255 + %3 = shl i32 %2, 8 + %4 = or i32 %2, %3 + ret i32 %4 +} + +define i64 @test_bfxil_not_orr_i64(i64 %0, i64 %1) { +; CHECK-LABEL: test_bfxil_not_orr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and x0, x0, #0xff000 +; CHECK-NEXT: bfxil x0, x1, #12, #8 +; CHECK-NEXT: ret + %shifted-mask = and i64 %1, 1044480 + %bfi-dst = and i64 %0, 1044480 + %bit-field-pos-op = lshr i64 %shifted-mask, 12 + %or_res = or i64 %bit-field-pos-op, %bfi-dst + ret i64 %or_res +} + +define i64 @test_orr_not_bfxil_i64(i64 %0) { +; CHECK-LABEL: test_orr_not_bfxil_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: and x8, x0, #0xff000 +; CHECK-NEXT: bfxil x8, x0, #12, #8 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret + %2 = and i64 %0, 1044480 ; 0xff000 + %3 = lshr i64 %2, 12 + %4 = or i64 %2, %3 + ret i64 %4 +} + +define i32 @test_bfxil_not_orr_i32(i32 %0, i32 %1) { +; CHECK-LABEL: test_bfxil_not_orr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w0, w0, #0xff000 +; CHECK-NEXT: bfxil w0, w1, #12, #8 +; CHECK-NEXT: ret + %shifted-mask = and i32 %1, 1044480 + %bfi-dst = and i32 %0, 1044480 + %bit-field-pos-op = lshr i32 %shifted-mask, 12 + %or_res = or i32 %bit-field-pos-op, %bfi-dst + ret i32 %or_res +} + +define i32 @test_orr_not_bfxil_i32(i32 %0) { +; CHECK-LABEL: test_orr_not_bfxil_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w0, #0xff000 +; CHECK-NEXT: bfxil w8, w0, #12, #8 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %2 = and i32 %0, 1044480 ; 0xff000 + %3 = lshr i32 %2, 12 + %4 = or i32 %2, %3 + ret i32 %4 +} + define void @test_nouseful_strb(i32* %ptr32, i8* %ptr8, i32 %x) { ; CHECK-LABEL: test_nouseful_strb: ; CHECK: // %bb.0: // %entry