Skip to content

Commit 368d52b

Browse files
committedJun 6, 2018
Implement bittest intrinsics generically for non-x86 platforms
I tested these locally on an x86 machine by disabling the inline asm codepath and confirming that it does the same bitflips as we do with the inline asm. Addresses code review feedback. llvm-svn: 334059
1 parent 1fd005f commit 368d52b

File tree

2 files changed

+247
-42
lines changed

2 files changed

+247
-42
lines changed
 

‎clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 142 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -484,24 +484,61 @@ CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
484484
return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
485485
}
486486

487-
static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
488-
char TestAnd, char Size,
489-
bool Locked = false) {
490-
Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
491-
Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
487+
// Get properties of an X86 BT* assembly instruction. The first returned value
488+
// is the action character code, which can be for complement, reset, or set. The
489+
// second is the size suffix which our assembler needs. The last is whether to
490+
// add the lock prefix.
491+
static std::tuple<char, char, bool>
492+
getBitTestActionSizeAndLocking(unsigned BuiltinID) {
493+
switch (BuiltinID) {
494+
case Builtin::BI_bittest:
495+
return {'\0', 'l', false};
496+
case Builtin::BI_bittestandcomplement:
497+
return {'c', 'l', false};
498+
case Builtin::BI_bittestandreset:
499+
return {'r', 'l', false};
500+
case Builtin::BI_bittestandset:
501+
return {'s', 'l', false};
502+
case Builtin::BI_interlockedbittestandreset:
503+
return {'r', 'l', /*Locked=*/true};
504+
case Builtin::BI_interlockedbittestandset:
505+
return {'s', 'l', /*Locked=*/true};
506+
507+
case Builtin::BI_bittest64:
508+
return {'\0', 'q', false};
509+
case Builtin::BI_bittestandcomplement64:
510+
return {'c', 'q', false};
511+
case Builtin::BI_bittestandreset64:
512+
return {'r', 'q', false};
513+
case Builtin::BI_bittestandset64:
514+
return {'s', 'q', false};
515+
case Builtin::BI_interlockedbittestandreset64:
516+
return {'r', 'q', /*Locked=*/true};
517+
case Builtin::BI_interlockedbittestandset64:
518+
return {'s', 'q', /*Locked=*/true};
519+
}
520+
llvm_unreachable("expected only bittest builtins");
521+
}
522+
523+
static RValue EmitX86BitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID,
524+
const CallExpr *E, Value *BitBase,
525+
Value *BitPos) {
526+
char Action, Size;
527+
bool Locked;
528+
std::tie(Action, Size, Locked) = getBitTestActionSizeAndLocking(BuiltinID);
492529

493530
// Build the assembly.
494531
SmallString<64> Asm;
495532
raw_svector_ostream AsmOS(Asm);
496533
if (Locked)
497534
AsmOS << "lock ";
498535
AsmOS << "bt";
499-
if (TestAnd)
500-
AsmOS << TestAnd;
536+
if (Action)
537+
AsmOS << Action;
501538
AsmOS << Size << " $2, ($1)\n\tsetc ${0:b}";
502539

503540
// Build the constraints. FIXME: We should support immediates when possible.
504-
std::string Constraints = "=r,r,r,~{cc},~{flags},~{memory},~{fpsr}";
541+
std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
505542
llvm::IntegerType *IntType = llvm::IntegerType::get(
506543
CGF.getLLVMContext(),
507544
CGF.getContext().getTypeSize(E->getArg(1)->getType()));
@@ -515,6 +552,97 @@ static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
515552
return RValue::get(CS.getInstruction());
516553
}
517554

555+
/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
556+
/// bits and a bit position and read and optionally modify the bit at that
557+
/// position. The position index can be arbitrarily large, i.e. it can be larger
558+
/// than 31 or 63, so we need an indexed load in the general case.
559+
static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID,
560+
const CallExpr *E) {
561+
Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
562+
Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
563+
564+
// X86 has special BT, BTC, BTR, and BTS instructions that handle the array
565+
// indexing operation internally. Use them if possible.
566+
llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
567+
if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
568+
return EmitX86BitTestIntrinsic(CGF, BuiltinID, E, BitBase, BitPos);
569+
570+
// Otherwise, use generic code to load one byte and test the bit. Use all but
571+
// the bottom three bits as the array index, and the bottom three bits to form
572+
// a mask.
573+
// Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
574+
Value *ByteIndex = CGF.Builder.CreateAShr(
575+
BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
576+
Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
577+
Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
578+
ByteIndex, "bittest.byteaddr"),
579+
CharUnits::One());
580+
Value *PosLow =
581+
CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
582+
llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
583+
584+
// The updating instructions will need a mask.
585+
Value *Mask = nullptr;
586+
if (BuiltinID != Builtin::BI_bittest && BuiltinID != Builtin::BI_bittest64) {
587+
Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
588+
"bittest.mask");
589+
}
590+
591+
// Emit a combined atomicrmw load/store operation for the interlocked
592+
// intrinsics.
593+
Value *OldByte = nullptr;
594+
switch (BuiltinID) {
595+
case Builtin::BI_interlockedbittestandreset:
596+
case Builtin::BI_interlockedbittestandreset64:
597+
OldByte = CGF.Builder.CreateAtomicRMW(
598+
AtomicRMWInst::And, ByteAddr.getPointer(), CGF.Builder.CreateNot(Mask),
599+
llvm::AtomicOrdering::SequentiallyConsistent);
600+
break;
601+
case Builtin::BI_interlockedbittestandset:
602+
case Builtin::BI_interlockedbittestandset64:
603+
OldByte = CGF.Builder.CreateAtomicRMW(
604+
AtomicRMWInst::Or, ByteAddr.getPointer(), Mask,
605+
llvm::AtomicOrdering::SequentiallyConsistent);
606+
break;
607+
default:
608+
break;
609+
}
610+
611+
// Emit a plain load for the non-interlocked intrinsics.
612+
if (!OldByte) {
613+
OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
614+
Value *NewByte = nullptr;
615+
switch (BuiltinID) {
616+
case Builtin::BI_bittest:
617+
case Builtin::BI_bittest64:
618+
// Don't store anything.
619+
break;
620+
case Builtin::BI_bittestandcomplement:
621+
case Builtin::BI_bittestandcomplement64:
622+
NewByte = CGF.Builder.CreateXor(OldByte, Mask);
623+
break;
624+
case Builtin::BI_bittestandreset:
625+
case Builtin::BI_bittestandreset64:
626+
NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
627+
break;
628+
case Builtin::BI_bittestandset:
629+
case Builtin::BI_bittestandset64:
630+
NewByte = CGF.Builder.CreateOr(OldByte, Mask);
631+
break;
632+
default:
633+
llvm_unreachable("non bittest family builtin");
634+
}
635+
if (NewByte)
636+
CGF.Builder.CreateStore(NewByte, ByteAddr);
637+
}
638+
639+
// However we loaded the old byte, either by plain load or atomicrmw, shift
640+
// the bit into the low position and mask it to 0 or 1.
641+
Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
642+
return RValue::get(CGF.Builder.CreateAnd(
643+
ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res"));
644+
}
645+
518646
// Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
519647
// handle them here.
520648
enum class CodeGenFunction::MSVCIntrin {
@@ -2806,31 +2934,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
28062934
case Builtin::BI_InterlockedXor:
28072935
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
28082936

2937+
case Builtin::BI_bittest64:
28092938
case Builtin::BI_bittest:
2810-
return EmitBitTestIntrinsic(*this, E, '\0', 'l');
2939+
case Builtin::BI_bittestandcomplement64:
28112940
case Builtin::BI_bittestandcomplement:
2812-
return EmitBitTestIntrinsic(*this, E, 'c', 'l');
2941+
case Builtin::BI_bittestandreset64:
28132942
case Builtin::BI_bittestandreset:
2814-
return EmitBitTestIntrinsic(*this, E, 'r', 'l');
2943+
case Builtin::BI_bittestandset64:
28152944
case Builtin::BI_bittestandset:
2816-
return EmitBitTestIntrinsic(*this, E, 's', 'l');
28172945
case Builtin::BI_interlockedbittestandreset:
2818-
return EmitBitTestIntrinsic(*this, E, 'r', 'l', /*Locked=*/true);
2819-
case Builtin::BI_interlockedbittestandset:
2820-
return EmitBitTestIntrinsic(*this, E, 's', 'l', /*Locked=*/true);
2821-
2822-
case Builtin::BI_bittest64:
2823-
return EmitBitTestIntrinsic(*this, E, '\0', 'q');
2824-
case Builtin::BI_bittestandcomplement64:
2825-
return EmitBitTestIntrinsic(*this, E, 'c', 'q');
2826-
case Builtin::BI_bittestandreset64:
2827-
return EmitBitTestIntrinsic(*this, E, 'r', 'q');
2828-
case Builtin::BI_bittestandset64:
2829-
return EmitBitTestIntrinsic(*this, E, 's', 'q');
28302946
case Builtin::BI_interlockedbittestandreset64:
2831-
return EmitBitTestIntrinsic(*this, E, 'r', 'q', /*Locked=*/true);
28322947
case Builtin::BI_interlockedbittestandset64:
2833-
return EmitBitTestIntrinsic(*this, E, 's', 'q', /*Locked=*/true);
2948+
case Builtin::BI_interlockedbittestandset:
2949+
return EmitBitTestIntrinsic(*this, BuiltinID, E);
28342950

28352951
case Builtin::BI__exception_code:
28362952
case Builtin::BI_exception_code:

‎clang/test/CodeGen/bittest-intrin.c

Lines changed: 105 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s
1+
// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=X64
2+
// RUN: %clang_cc1 -fms-extensions -triple thumbv7-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=ARM
3+
// RUN: %clang_cc1 -fms-extensions -triple aarch64-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=ARM
24

35
volatile unsigned char sink = 0;
46
void test32(long *base, long idx) {
@@ -18,18 +20,105 @@ void test64(__int64 *base, __int64 idx) {
1820
sink = _interlockedbittestandset64(base, idx);
1921
}
2022

21-
// CHECK-LABEL: define dso_local void @test32(i32* %base, i32 %idx)
22-
// CHECK: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
23-
// CHECK: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
24-
// CHECK: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
25-
// CHECK: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
26-
// CHECK: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
27-
// CHECK: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
28-
29-
// CHECK-LABEL: define dso_local void @test64(i64* %base, i64 %idx)
30-
// CHECK: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
31-
// CHECK: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
32-
// CHECK: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
33-
// CHECK: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
34-
// CHECK: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
35-
// CHECK: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
23+
// X64-LABEL: define dso_local void @test32(i32* %base, i32 %idx)
24+
// X64: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
25+
// X64: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
26+
// X64: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
27+
// X64: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
28+
// X64: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
29+
// X64: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
30+
31+
// X64-LABEL: define dso_local void @test64(i64* %base, i64 %idx)
32+
// X64: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
33+
// X64: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
34+
// X64: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
35+
// X64: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
36+
// X64: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
37+
// X64: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
38+
39+
// ARM-LABEL: define dso_local {{.*}}void @test32(i32* %base, i32 %idx)
40+
// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
41+
// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
42+
// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
43+
// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
44+
// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
45+
// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
46+
// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
47+
// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
48+
// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
49+
50+
// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
51+
// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
52+
// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
53+
// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
54+
// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
55+
// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
56+
// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
57+
// ARM: %[[NEWBYTE:[^ ]*]] = xor i8 %[[BYTE]], %[[MASK]]
58+
// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
59+
// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
60+
// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
61+
// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
62+
63+
// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
64+
// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
65+
// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
66+
// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
67+
// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
68+
// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
69+
// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
70+
// ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1
71+
// ARM: %[[NEWBYTE:[^ ]*]] = and i8 %[[BYTE]], %[[NOTMASK]]
72+
// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
73+
// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
74+
// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
75+
// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
76+
77+
// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
78+
// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
79+
// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
80+
// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
81+
// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
82+
// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
83+
// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
84+
// ARM: %[[NEWBYTE:[^ ]*]] = or i8 %[[BYTE]], %[[MASK]]
85+
// ARM store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
86+
// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
87+
// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
88+
// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
89+
90+
// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
91+
// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
92+
// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
93+
// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
94+
// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
95+
// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
96+
// ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1
97+
// ARM: %[[BYTE:[^ ]*]] = atomicrmw and i8* %[[BYTEADDR]], i8 %[[NOTMASK]] seq_cst
98+
// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
99+
// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
100+
// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
101+
102+
// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
103+
// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
104+
// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
105+
// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
106+
// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
107+
// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
108+
// ARM: %[[BYTE:[^ ]*]] = atomicrmw or i8* %[[BYTEADDR]], i8 %[[MASK]] seq_cst
109+
// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
110+
// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
111+
// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
112+
113+
// ARM-LABEL: define dso_local {{.*}}void @test64(i64* %base, i64 %idx)
114+
// ARM: %[[IDXHI:[^ ]*]] = ashr i64 %{{.*}}, 3
115+
// ARM: %[[BASE:[^ ]*]] = bitcast i64* %{{.*}} to i8*
116+
// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i64 %[[IDXHI]]
117+
// ARM: %[[IDX8:[^ ]*]] = trunc i64 %{{.*}} to i8
118+
// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
119+
// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
120+
// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
121+
// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
122+
// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
123+
124+
// ... the rest is the same, but with i64 instead of i32.

0 commit comments

Comments
 (0)
Please sign in to comment.