Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1120,6 +1120,16 @@ if (!MI.getOperand(1).isReg()) return false; + auto NormalizeCmpValue = [](int64_t Value) -> int { + // Comparison immediates may be 64-bit, but CmpValue is only an int. + // Normalize to 0/1/2 return value, where 2 indicates any value apart from + // 0 or 1. + // TODO: Switch CmpValue to int64_t in the API to avoid this. + if (Value == 0 || Value == 1) + return Value; + return 2; + }; + switch (MI.getOpcode()) { default: break; @@ -1155,8 +1165,7 @@ SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME: In order to convert CmpValue to 0 or 1 - CmpValue = MI.getOperand(2).getImm() != 0; + CmpValue = NormalizeCmpValue(MI.getOperand(2).getImm()); return true; case AArch64::ANDSWri: case AArch64::ANDSXri: @@ -1165,14 +1174,9 @@ SrcReg = MI.getOperand(1).getReg(); SrcReg2 = 0; CmpMask = ~0; - // FIXME:The return val type of decodeLogicalImmediate is uint64_t, - // while the type of CmpValue is int. When converting uint64_t to int, - // the high 32 bits of uint64_t will be lost. - // In fact it causes a bug in spec2006-483.xalancbmk - // CmpValue is only used to compare with zero in OptimizeCompareInstr - CmpValue = AArch64_AM::decodeLogicalImmediate( + CmpValue = NormalizeCmpValue(AArch64_AM::decodeLogicalImmediate( MI.getOperand(2).getImm(), - MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0; + MI.getOpcode() == AArch64::ANDSWri ? 32 : 64)); return true; } @@ -1462,10 +1466,9 @@ if (CmpInstr.getOpcode() == AArch64::PTEST_PP) return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI); - // Continue only if we have a "ri" where immediate is zero. - // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare - // function. - assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!"); + // Warning: CmpValue == 2 indicates *any* value apart from 0 or 1. + assert((CmpValue == 0 || CmpValue == 1 || CmpValue == 2) && + "CmpValue must be 0, 1, or 2!"); if (SrcReg2 != 0) return false; @@ -1473,9 +1476,10 @@ if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg())) return false; - if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) + if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI)) return true; - return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); + return (CmpValue == 0 || CmpValue == 1) && + removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI); } /// Get opcode of S version of Instr. Index: llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir =================================================================== --- llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir +++ llvm/test/CodeGen/AArch64/csinc-cmp-removal.mir @@ -307,3 +307,42 @@ RET_ReallyLR ... +--- +name: subswr_wrong_cmp_value +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: subswr_wrong_cmp_value + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF + ; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr killed [[DEF]], [[COPY]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv + ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri killed [[CSINCWr]], 3, 0, implicit-def $nzcv + ; CHECK: Bcc 1, %bb.2, implicit $nzcv + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: B %bb.2 + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.0: + liveins: $x1 + successors: %bb.1(0x40000000), %bb.2(0x40000000) + %1:gpr64common = COPY $x1 + %2:gpr64 = IMPLICIT_DEF + %3:gpr64 = SUBSXrr killed %2:gpr64, %1:gpr64common, implicit-def $nzcv + %4:gpr32common = CSINCWr $wzr, $wzr, 1, implicit $nzcv + %5:gpr32 = SUBSWri killed %4:gpr32common, 3, 0, implicit-def $nzcv + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + B %bb.2 + + bb.2: + RET_ReallyLR + +... Index: llvm/test/CodeGen/AArch64/pr51476.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/pr51476.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +define void @test(i8 %arg) nounwind { +; CHECK-LABEL: test: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: cmp w8, #1 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: cmp w0, #3 +; CHECK-NEXT: strb w0, [sp, #12] +; CHECK-NEXT: b.eq .LBB0_2 +; CHECK-NEXT: // %bb.1: // %do_call +; CHECK-NEXT: bl unknown +; CHECK-NEXT: .LBB0_2: // %common.ret +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %tmp = alloca i8 + %cmp1 = icmp ne i8 %arg, 1 + %zext = zext i1 %cmp1 to i8 + store i8 %zext, i8* %tmp + %zext2 = load i8, i8* %tmp + %cmp2 = icmp eq i8 %zext2, 3 + br i1 %cmp2, label %exit, label %do_call + +do_call: + call void @unknown(i8 %zext2) + ret void + +exit: + ret void +} + +declare void @unknown(i8)