Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -415,12 +415,14 @@ // Any instruction that defines a 32-bit result zeros out the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may // be copying from a truncate. But any other 32-bit operation will zero-extend -// up to 64 bits. +// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper +// 32 bits, they're probably just qualifying a CopyFromReg. // FIXME: X86 also checks for CMOV here. Do we need something similar? static inline bool isDef32(const SDNode &N) { unsigned Opc = N.getOpcode(); return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG && - Opc != ISD::CopyFromReg; + Opc != ISD::CopyFromReg && Opc != ISD::AssertSext && + Opc != ISD::AssertZext; } } // end anonymous namespace Index: llvm/test/CodeGen/AArch64/arm64-assert-zext-sext.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/arm64-assert-zext-sext.ll @@ -0,0 +1,101 @@ +; RUN: llc -O2 -mtriple=aarch64-linux-gnu < %s | FileCheck %s + +%struct.a = type { i32 } + +@a = internal unnamed_addr global i1 false, align 8 +@b = common local_unnamed_addr global i32 0, align 4 +@e = common local_unnamed_addr global i32 0, align 4 +@d = common local_unnamed_addr global i32 0, align 4 +@.str = private unnamed_addr constant [6 x i8] c"%llu\0A\00", align 1 +@g = global i32 193, align 4 +@f = local_unnamed_addr global i8* bitcast (i32* @g to i8*), align 8 +@j = common global i32 0, align 4 +@k = global i16* bitcast (i32* @j to i16*), align 8 +@n = local_unnamed_addr global i32** bitcast (i16** @k to i32**), align 8 +@m = common local_unnamed_addr global %struct.a zeroinitializer, align 4 + +declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr + +define i32 @assertzext() local_unnamed_addr { +entry: + %.b = load i1, i1* @a, align 8 + %i = select i1 %.b, i64 0, i64 66296709418 + store i1 true, i1* @a, align 8 + %conv.i = trunc i64 %i to i32 + %.pr.i = load i32, i32* @b, align 4 + %cmp11.i = icmp eq i32 %.pr.i, 0 + br i1 %cmp11.i, label %end, label %for.body.lr.ph.i + +for.body.lr.ph.i: ; preds = %entry + %i1 = load i32, i32* @e, align 4 + %sext.i = and i64 %i, 1872199978 + %inc.i.peel = add nsw i32 %.pr.i, 1 + %cmp.i.peel = icmp eq i32 %inc.i.peel, 0 + br i1 %cmp.i.peel, label %for.cond.for.end_crit_edge.i, label %for.body.i + +for.body.i: ; preds = %for.body.i, %for.body.lr.ph.i + %i2 = phi i32 [ %inc.i, %for.body.i ], [ %inc.i.peel, %for.body.lr.ph.i ] + %inc.i = add nsw i32 %i2, 1 + %cmp.i = icmp eq i32 %inc.i, 0 + br i1 %cmp.i, label %for.cond.for.end_crit_edge.i, label %for.body.i + +for.cond.for.end_crit_edge.i: ; preds = %for.body.i, %for.body.lr.ph.i + %n.012.i.lcssa = phi i64 [ %sext.i, %for.body.lr.ph.i ], [ 0, %for.body.i ] + %conv2.i = sext i32 %i1 to i64 + %i3 = inttoptr i64 %conv2.i to i64* + store i64 %n.012.i.lcssa, i64* %i3, align 8 + store i32 0, i32* @b, align 4 + br label %end + +end: ; preds = %for.cond.for.end_crit_edge.i, %entry + %n.0.lcssa.i = phi i32 [ 0, %for.cond.for.end_crit_edge.i ], [ %conv.i, %entry ] + %call.i = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %n.0.lcssa.i) + %conv4.i = sext i32 %n.0.lcssa.i to i64 + %call5.i = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i64 %conv4.i) + ret i32 0 +; CHECK-LABEL: end +; CHECK: mov w{{[0-9]+}}, w{{[0-9]+}} +; CHECK: bl printf +; CHECK: mov w{{[0-9]+}}, w{{[0-9]+}} +; CHECK: bl printf +} + +declare i64 @test(i64) + +define i32 @assertsext() { +entry: + %i = load i8*, i8** @f, align 8 + %i1 = load i8, i8* %i, align 1 + %conv.i.i = sext i8 %i1 to i32 + %i2 = load i32, i32* getelementptr inbounds (%struct.a, %struct.a* @m, i64 0, i32 0), align 4 + %mul = mul nsw i32 %i2, %conv.i.i + %xor = xor i32 %mul, 3 + %tobool = icmp eq i32 %xor, 0 + br i1 %tobool, label %t.exit, label %land.end + +land.end: ; preds = %entry + %conv1 = zext i32 %conv.i.i to i64 + %conv.i.i.i = sext i32 %xor to i64 + %div.i7 = udiv i64 2036854775807, %conv1 + %cmp = icmp slt i64 %div.i7, %conv.i.i.i + %spec.select = select i1 %cmp, i32 1, i32 %conv.i.i + br label %t.exit +; CHECK-LABEL: land.end +; CHECK: mov w{{[0-9]+}}, w{{[0-9]+}} +; CHECK: udiv x{{[0-9]+}}, x{{[0-9]+}}, x{{[0-9]+}} + +t.exit: ; preds = %land.end, %entry + %i3 = phi i32 [ %conv.i.i, %entry ], [ %spec.select, %land.end ] + %conv3.i.i.i = trunc i32 %i3 to i16 + %i4 = load i16*, i16** @k, align 8 + store i16 %conv3.i.i.i, i16* %i4, align 2 + %i5 = load i32**, i32*** @n, align 8 + %i6 = load i32*, i32** %i5, align 8 + %i7 = load i32, i32* %i6, align 4 + %xor.i.i = xor i32 %i7, 8 + store i32 %xor.i.i, i32* %i6, align 4 + %call2.i = tail call i64 bitcast (i64 (i64)* @test to i64 (i32)*)(i32 undef) + %i8 = load i32, i32* @j, align 4 + %call1 = tail call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %i8) + ret i32 0 +} Index: llvm/test/CodeGen/AArch64/shift_minsize.ll =================================================================== --- llvm/test/CodeGen/AArch64/shift_minsize.ll +++ llvm/test/CodeGen/AArch64/shift_minsize.ll @@ -59,7 +59,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __ashlti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -86,7 +86,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __ashrti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -112,7 +112,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-NEXT: mov w2, w2 ; CHECK-NEXT: bl __lshrti3 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret