Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -69,15 +69,14 @@ // We want to use these instructions, and using fp32 denormals also causes // instructions to run at the double precision rate for the device so it's // probably best to just report no single precision denormals. -static uint32_t getFPMode(const MachineFunction &F) { - const GCNSubtarget& ST = F.getSubtarget(); - // TODO: Is there any real use for the flush in only / flush out only modes? +static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) { + // TODO: Is there any real use for the flush in only / flush out only modes? uint32_t FP32Denormals = - ST.hasFP32Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; + Mode.FP32Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; uint32_t FP64Denormals = - ST.hasFP64Denormals() ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; + Mode.FP64FP16Denormals ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) | FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) | @@ -1026,11 +1025,12 @@ ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( &STM, ProgInfo.NumVGPRsForWavesPerEU); + const SIModeRegisterDefaults Mode = MFI->getMode(); + // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode // register. - ProgInfo.FloatMode = getFPMode(MF); + ProgInfo.FloatMode = getFPMode(Mode); - const SIModeRegisterDefaults Mode = MFI->getMode(); ProgInfo.IEEEMode = Mode.IEEE; // Make clamp modifier on NaN input returns 0. Index: llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -69,6 +69,7 @@ Module *Mod = nullptr; const DataLayout *DL = nullptr; bool HasUnsafeFPMath = false; + bool HasFP32Denormals = false; /// Copies exact/nsw/nuw flags (if any) from binary operation \p I to /// binary operation \p V. @@ -574,7 +575,6 @@ Value *NewFDiv = nullptr; - bool HasDenormals = ST->hasFP32Denormals(); if (VectorType *VT = dyn_cast(Ty)) { NewFDiv = UndefValue::get(VT); @@ -585,7 +585,7 @@ Value *DenEltI = Builder.CreateExtractElement(Den, I); Value *NewElt; - if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasDenormals)) { + if (shouldKeepFDivF32(NumEltI, UnsafeDiv, HasFP32Denormals)) { NewElt = Builder.CreateFDiv(NumEltI, DenEltI); } else { NewElt = Builder.CreateCall(Decl, { NumEltI, DenEltI }); @@ -594,7 +594,7 @@ NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I); } } else { - if (!shouldKeepFDivF32(Num, UnsafeDiv, HasDenormals)) + if (!shouldKeepFDivF32(Num, UnsafeDiv, HasFP32Denormals)) NewFDiv = Builder.CreateCall(Decl, { Num, Den }); } @@ -1033,6 +1033,7 @@ AC = &getAnalysis().getAssumptionCache(F); DA = &getAnalysis(); HasUnsafeFPMath = hasUnsafeFPMath(F); + HasFP32Denormals = ST->hasFP32Denormals(F); bool MadeChange = false; Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -127,6 +127,10 @@ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const GCNSubtarget *Subtarget; + + // Default FP mode for the current function. + AMDGPU::SIModeRegisterDefaults Mode; + bool EnableLateStructurizeCFG; public: @@ -392,6 +396,7 @@ } #endif Subtarget = &MF.getSubtarget(); + Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction(), *Subtarget); return SelectionDAGISel::runOnMachineFunction(MF); } @@ -2103,7 +2108,7 @@ bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); - assert((IsFMA || !Subtarget->hasFP32Denormals()) && + assert((IsFMA || !Mode.FP32Denormals) && "fmad selected with denormals enabled"); // TODO: We can select this with f32 denormals enabled if all the sources are // converted from f16 (in which case fmad isn't legal). Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1584,8 +1584,11 @@ // float fqneg = -fq; SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq); + MachineFunction &MF = DAG.getMachineFunction(); + const AMDGPUMachineFunction *MFI = MF.getInfo(); + // float fr = mad(fqneg, fb, fa); - unsigned OpCode = Subtarget->hasFP32Denormals() ? + unsigned OpCode = MFI->getMode().FP32Denormals ? (unsigned)AMDGPUISD::FMAD_FTZ : (unsigned)ISD::FMAD; SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa); @@ -1666,8 +1669,11 @@ } if (isTypeLegal(MVT::i64)) { + MachineFunction &MF = DAG.getMachineFunction(); + const SIMachineFunctionInfo *MFI = MF.getInfo(); + // Compute denominator reciprocal. - unsigned FMAD = Subtarget->hasFP32Denormals() ? + unsigned FMAD = MFI->getMode().FP32Denormals ? (unsigned)AMDGPUISD::FMAD_FTZ : (unsigned)ISD::FMAD; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -92,13 +92,16 @@ class AMDGPUPat : Pat, PredicateControl; -def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">; -def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">; -def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">; -def NoFP16Denormals : Predicate<"!Subtarget->hasFP16Denormals()">; -def NoFP32Denormals : Predicate<"!Subtarget->hasFP32Denormals()">; -def NoFP64Denormals : Predicate<"!Subtarget->hasFP64Denormals()">; +let RecomputePerFunction = 1 in { +def FP16Denormals : Predicate<"MF->getInfo()->getMode().FP64FP16Denormals">; +def FP32Denormals : Predicate<"MF->getInfo()->getMode().FP32Denormals">; +def FP64Denormals : Predicate<"MF->getInfo()->getMode().FP64FP16Denormals">; +def NoFP16Denormals : Predicate<"!MF->getInfo()->getMode().FP64FP16Denormals">; +def NoFP32Denormals : Predicate<"!MF->getInfo()->getMode().FP32Denormals">; +def NoFP64Denormals : Predicate<"!MF->getInfo()->getMode().FP64FP16Denormals">; def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; +} + def FMA : Predicate<"Subtarget->hasFMA()">; def InstFlag : OperandWithDefaultOps ; Index: llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h +++ llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h @@ -11,6 +11,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" +#include "Utils/AMDGPUBaseInfo.h" namespace llvm { @@ -28,6 +29,9 @@ /// Number of bytes in the LDS that are being used. unsigned LDSSize; + // State of MODE register, assumed FP mode. + AMDGPU::SIModeRegisterDefaults Mode; + // Kernels + shaders. i.e. functions called by the driver and not called // by other functions. bool IsEntryFunction; @@ -53,6 +57,10 @@ return LDSSize; } + AMDGPU::SIModeRegisterDefaults getMode() const { + return Mode; + } + bool isEntryFunction() const { return IsEntryFunction; } Index: llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp @@ -18,6 +18,7 @@ LocalMemoryObjects(), ExplicitKernArgSize(0), LDSSize(0), + Mode(MF.getFunction(), MF.getSubtarget()), IsEntryFunction(AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv())), NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath), MemoryBound(false), Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -148,7 +148,12 @@ return HasMadMixInsts; } - bool hasFP32Denormals() const { + bool hasFP32Denormals(const Function &F) const { + // FIXME: This should not be a property of the subtarget. This should be a + // property with a default set by the calling convention which can be + // overridden by attributes. For now, use the subtarget feature as a + // placeholder attribute. The function arguments only purpose is to + // discourage use without a function context until this is removed. return FP32Denormals; } @@ -612,11 +617,17 @@ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const; - bool hasFP16Denormals() const { + /// Alias for hasFP64FP16Denormals + bool hasFP16Denormals(const Function &F) const { return FP64FP16Denormals; } - bool hasFP64Denormals() const { + /// Alias for hasFP64FP16Denormals + bool hasFP64Denormals(const Function &F) const { + return FP64FP16Denormals; + } + + bool hasFP64FP16Denormals(const Function &F) const { return FP64FP16Denormals; } Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -46,7 +46,7 @@ Triple TargetTriple; - const TargetSubtargetInfo *ST; + const GCNSubtarget *ST; const TargetLoweringBase *TLI; const TargetSubtargetInfo *getST() const { return ST; } @@ -73,6 +73,7 @@ const AMDGPUTargetLowering *TLI; AMDGPUTTIImpl CommonTTI; bool IsGraphicsShader; + bool HasFP32Denormals; const FeatureBitset InlineFeatureIgnoreList = { // Codegen control options which don't matter. @@ -131,7 +132,8 @@ ST(static_cast(TM->getSubtargetImpl(F))), TLI(ST->getTargetLowering()), CommonTTI(TM, F), - IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {} + IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())), + HasFP32Denormals(ST->hasFP32Denormals(F)) { } bool hasBranchDivergence() { return true; } Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -412,7 +412,7 @@ if (!Args.empty() && match(Args[0], PatternMatch::m_FPOne())) { // TODO: This is more complicated, unsafe flags etc. - if ((SLT == MVT::f32 && !ST->hasFP32Denormals()) || + if ((SLT == MVT::f32 && !HasFP32Denormals) || (SLT == MVT::f16 && ST->has16BitInsts())) { return LT.first * getQuarterRateInstrCost() * NElts; } @@ -431,7 +431,7 @@ if (SLT == MVT::f32 || SLT == MVT::f16) { int Cost = 7 * getFullRateInstrCost() + 1 * getQuarterRateInstrCost(); - if (!ST->hasFP32Denormals()) { + if (!HasFP32Denormals) { // FP mode switches. Cost += 2 * getFullRateInstrCost(); } @@ -671,10 +671,13 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { const TargetMachine &TM = getTLI()->getTargetMachine(); - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); + const GCNSubtarget *CallerST + = static_cast(TM.getSubtargetImpl(*Caller)); + const GCNSubtarget *CalleeST + = static_cast(TM.getSubtargetImpl(*Callee)); + + const FeatureBitset &CallerBits = CallerST->getFeatureBits(); + const FeatureBitset &CalleeBits = CalleeST->getFeatureBits(); FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; @@ -683,8 +686,8 @@ // FIXME: dx10_clamp can just take the caller setting, but there seems to be // no way to support merge for backend defined attributes. - AMDGPU::SIModeRegisterDefaults CallerMode(*Caller); - AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee); + AMDGPU::SIModeRegisterDefaults CallerMode(*Caller, *CallerST); + AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee, *CalleeST); return CallerMode.isInlineCompatible(CalleeMode); } Index: llvm/lib/Target/AMDGPU/R600ISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -223,10 +223,8 @@ setOperationAction(ISD::FMA, MVT::f64, Expand); } - // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we - // need it for R600. - if (!Subtarget->hasFP32Denormals()) - setOperationAction(ISD::FMAD, MVT::f32, Legal); + // FIXME: May need no denormals check + setOperationAction(ISD::FMAD, MVT::f32, Legal); if (!Subtarget->hasBFI()) { // fcopysign can be done in a single instruction with BFI. Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1358,8 +1358,8 @@ case AMDGPU::V_MUL_F32_e64: case AMDGPU::V_MUL_F16_e64: { // If output denormals are enabled, omod is ignored. - if ((Op == AMDGPU::V_MUL_F32_e64 && ST->hasFP32Denormals()) || - (Op == AMDGPU::V_MUL_F16_e64 && ST->hasFP16Denormals())) + if ((Op == AMDGPU::V_MUL_F32_e64 && MFI->getMode().FP32Denormals) || + (Op == AMDGPU::V_MUL_F16_e64 && MFI->getMode().FP64FP16Denormals)) return std::make_pair(nullptr, SIOutMods::NONE); const MachineOperand *RegOp = nullptr; @@ -1388,8 +1388,8 @@ case AMDGPU::V_ADD_F32_e64: case AMDGPU::V_ADD_F16_e64: { // If output denormals are enabled, omod is ignored. - if ((Op == AMDGPU::V_ADD_F32_e64 && ST->hasFP32Denormals()) || - (Op == AMDGPU::V_ADD_F16_e64 && ST->hasFP16Denormals())) + if ((Op == AMDGPU::V_ADD_F32_e64 && MFI->getMode().FP32Denormals) || + (Op == AMDGPU::V_ADD_F16_e64 && MFI->getMode().FP64FP16Denormals)) return std::make_pair(nullptr, SIOutMods::NONE); // Look through the DAGCombiner canonicalization fmul x, 2 -> fadd x, x Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -393,7 +393,7 @@ bool isCanonicalized(SelectionDAG &DAG, SDValue Op, unsigned MaxDepth = 5) const; - bool denormalsEnabledForType(EVT VT) const; + bool denormalsEnabledForType(const SelectionDAG &DAG, EVT VT) const; bool isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -100,6 +100,16 @@ cl::desc("Do not align and prefetch loops"), cl::init(false)); +static bool hasFP32Denormals(const MachineFunction &MF) { + const SIMachineFunctionInfo *Info = MF.getInfo(); + return Info->getMode().FP32Denormals; +} + +static bool hasFP64FP16Denormals(const MachineFunction &MF) { + const SIMachineFunctionInfo *Info = MF.getInfo(); + return Info->getMode().FP64FP16Denormals; +} + static unsigned findFirstFreeSGPR(CCState &CCInfo) { unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs(); for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) { @@ -363,9 +373,10 @@ setOperationAction(ISD::FLOG10, MVT::f16, Custom); } - // v_mad_f32 does not support denormals according to some sources. - if (!Subtarget->hasFP32Denormals()) - setOperationAction(ISD::FMAD, MVT::f32, Legal); + // v_mad_f32 does not support denormals. We report it as unconditionally + // legal, and the context where it is formed will disallow it when fp32 + // denormals are enabled. + setOperationAction(ISD::FMAD, MVT::f32, Legal); if (!Subtarget->hasBFI()) { // fcopysign can be done in a single instruction with BFI. @@ -503,7 +514,7 @@ // F16 - VOP3 Actions. setOperationAction(ISD::FMA, MVT::f16, Legal); - if (!Subtarget->hasFP16Denormals() && STI.hasMadF16()) + if (STI.hasMadF16()) setOperationAction(ISD::FMAD, MVT::f16, Legal); for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) { @@ -765,8 +776,9 @@ EVT DestVT, EVT SrcVT) const { return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) || (Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) && - DestVT.getScalarType() == MVT::f32 && !Subtarget->hasFP32Denormals() && - SrcVT.getScalarType() == MVT::f16; + DestVT.getScalarType() == MVT::f32 && + SrcVT.getScalarType() == MVT::f16 && + !hasFP32Denormals(DAG.getMachineFunction()); } bool SITargetLowering::isShuffleMaskLegal(ArrayRef, EVT) const { @@ -3923,7 +3935,7 @@ // mad available which returns the same result as the separate operations // which we should prefer over fma. We can't use this if we want to support // denormals, so only report this in these cases. - if (Subtarget->hasFP32Denormals()) + if (hasFP32Denormals(MF)) return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts(); // If the subtarget has v_fmac_f32, that's just as good as v_mac_f32. @@ -3932,7 +3944,7 @@ case MVT::f64: return true; case MVT::f16: - return Subtarget->has16BitInsts() && Subtarget->hasFP16Denormals(); + return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF); default: break; } @@ -3946,9 +3958,11 @@ // v_mad_f32/v_mac_f32 do not support denormals. EVT VT = N->getValueType(0); if (VT == MVT::f32) - return !Subtarget->hasFP32Denormals(); - if (VT == MVT::f16) - return !Subtarget->hasFP16Denormals() && Subtarget->hasMadF16(); + return !hasFP32Denormals(DAG.getMachineFunction()); + if (VT == MVT::f16) { + return Subtarget->hasMadF16() && + !hasFP64FP16Denormals(DAG.getMachineFunction()); + } return false; } @@ -7536,7 +7550,7 @@ const SDNodeFlags Flags = Op->getFlags(); bool Unsafe = DAG.getTarget().Options.UnsafeFPMath || Flags.hasAllowReciprocal(); - if (!Unsafe && VT == MVT::f32 && Subtarget->hasFP32Denormals()) + if (!Unsafe && VT == MVT::f32 && hasFP32Denormals(DAG.getMachineFunction())) return SDValue(); if (const ConstantFPSDNode *CLHS = dyn_cast(LHS)) { @@ -7679,7 +7693,7 @@ static const SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG, const SDLoc &SL, const GCNSubtarget *ST) { assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE"); - int DPDenormModeDefault = ST->hasFP64Denormals() + int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction()) ? FP_DENORM_FLUSH_NONE : FP_DENORM_FLUSH_IN_FLUSH_OUT; @@ -7715,7 +7729,9 @@ (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_); const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16); - if (!Subtarget->hasFP32Denormals()) { + const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction()); + + if (!HasFP32Denormals) { SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); SDValue EnableDenorm; @@ -7759,8 +7775,7 @@ SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled, Fma3); - if (!Subtarget->hasFP32Denormals()) { - + if (!HasFP32Denormals) { SDValue DisableDenorm; if (Subtarget->hasDenormModeInst()) { const SDValue DisableDenormValue = @@ -8734,7 +8749,7 @@ auto F = CFP->getValueAPF(); if (F.isNaN() && F.isSignaling()) return false; - return !F.isDenormal() || denormalsEnabledForType(Op.getValueType()); + return !F.isDenormal() || denormalsEnabledForType(DAG, Op.getValueType()); } // If source is a result of another standard FP operation it is already in @@ -8803,7 +8818,7 @@ // snans will be quieted, so we only need to worry about denormals. if (Subtarget->supportsMinMaxDenormModes() || - denormalsEnabledForType(Op.getValueType())) + denormalsEnabledForType(DAG, Op.getValueType())) return true; // Flushing may be required. @@ -8875,7 +8890,7 @@ LLVM_FALLTHROUGH; } default: - return denormalsEnabledForType(Op.getValueType()) && + return denormalsEnabledForType(DAG, Op.getValueType()) && DAG.isKnownNeverSNaN(Op); } @@ -8886,7 +8901,7 @@ SDValue SITargetLowering::getCanonicalConstantFP( SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const { // Flush denormals to 0 if not enabled. - if (C.isDenormal() && !denormalsEnabledForType(VT)) + if (C.isDenormal() && !denormalsEnabledForType(DAG, VT)) return DAG.getConstantFP(0.0, SL, VT); if (C.isNaN()) { @@ -9424,8 +9439,8 @@ // Only do this if we are not trying to support denormals. v_mad_f32 does not // support denormals ever. - if (((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) || - (VT == MVT::f16 && !Subtarget->hasFP16Denormals() && + if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) || + (VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) && getSubtarget()->hasMadF16())) && isOperationLegal(ISD::FMAD, VT)) return ISD::FMAD; @@ -10891,14 +10906,14 @@ return false; } -bool SITargetLowering::denormalsEnabledForType(EVT VT) const { +bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG, + EVT VT) const { switch (VT.getScalarType().getSimpleVT().SimpleTy) { case MVT::f32: - return Subtarget->hasFP32Denormals(); + return hasFP32Denormals(DAG.getMachineFunction()); case MVT::f64: - return Subtarget->hasFP64Denormals(); case MVT::f16: - return Subtarget->hasFP16Denormals(); + return hasFP64FP16Denormals(DAG.getMachineFunction()); default: return false; } Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -340,9 +340,6 @@ AMDGPUFunctionArgInfo ArgInfo; - // State of MODE register, assumed FP mode. - AMDGPU::SIModeRegisterDefaults Mode; - // Graphics info. unsigned PSInputAddr = 0; unsigned PSInputEnable = 0; @@ -515,10 +512,6 @@ : I->second.Lanes[Lane]; } - AMDGPU::SIModeRegisterDefaults getMode() const { - return Mode; - } - bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); Index: llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -28,7 +28,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), - Mode(MF.getFunction()), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -676,7 +676,8 @@ FP32Denormals(true), FP64FP16Denormals(true) {} - SIModeRegisterDefaults(const Function &F); + // FIXME: Should not depend on the subtarget + SIModeRegisterDefaults(const Function &F, const GCNSubtarget &ST); static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) { const bool IsCompute = AMDGPU::isCompute(CC); @@ -695,10 +696,23 @@ FP64FP16Denormals == Other.FP64FP16Denormals; } + /// Returns true if a flag is compatible if it's enabled in the callee, but + /// disabled in the caller. + static bool oneWayCompatible(bool CallerMode, bool CalleeMode) { + return CallerMode == CalleeMode || (CallerMode && !CalleeMode); + } + // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should // be able to override. bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const { - return *this == CalleeMode; + if (DX10Clamp != CalleeMode.DX10Clamp) + return false; + if (IEEE != CalleeMode.IEEE) + return false; + + // Allow inlining denormals enabled into denormals flushed functions. + return oneWayCompatible(FP64FP16Denormals, CalleeMode.FP64FP16Denormals) && + oneWayCompatible(FP32Denormals, CalleeMode.FP32Denormals); } }; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1302,7 +1302,8 @@ return true; } -SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { +SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F, + const GCNSubtarget &ST) { *this = getDefaultForCallingConv(F.getCallingConv()); StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); @@ -1313,6 +1314,9 @@ = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); if (!DX10ClampAttr.empty()) DX10Clamp = DX10ClampAttr == "true"; + + FP32Denormals = ST.hasFP32Denormals(F); + FP64FP16Denormals = ST.hasFP64FP16Denormals(F); } namespace { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fcanonicalize.mir @@ -37,8 +37,8 @@ liveins: $vgpr0 ; GFX9-LABEL: name: fcanonicalize_f16_flush ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MAX_F16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F16_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F16_e64_]] + ; GFX9: [[V_MUL_F16_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F16_e64 0, 15360, 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MUL_F16_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s16) = G_TRUNC %0 %2:vgpr(s16) = G_FCANONICALIZE %1 @@ -60,8 +60,8 @@ ; GFX9-LABEL: name: fcanonicalize_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 0, [[COPY]], 0, 0, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -170,8 +170,8 @@ ; GFX9-LABEL: name: fcanonicalize_f64_flush ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 - ; GFX9: [[V_MAX_F64_:%[0-9]+]]:vreg_64 = V_MAX_F64 0, [[COPY]], 0, [[COPY]], 0, 0, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F64_]] + ; GFX9: [[V_MUL_F64_:%[0-9]+]]:vreg_64 = V_MUL_F64 0, 4607182418800017408, 0, [[COPY]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MUL_F64_]] %0:vgpr(s64) = COPY $vgpr0_vgpr1 %1:vgpr(s64) = G_FCANONICALIZE %0 S_ENDPGM 0, implicit %1 @@ -191,8 +191,8 @@ liveins: $vgpr0 ; GFX9-LABEL: name: fcanonicalize_fabs_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[COPY]], 0, 0, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[COPY]], 2, [[COPY]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FABS %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -237,8 +237,8 @@ liveins: $vgpr0 ; GFX9-LABEL: name: fcanonicalize_fneg_f32_denorm ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 3212836864, 0, [[COPY]], 0, 0, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 1, [[COPY]], 1, [[COPY]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FCANONICALIZE %1 @@ -283,8 +283,8 @@ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 ; GFX9: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec - ; GFX9: [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, 1065353216, 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec - ; GFX9: S_ENDPGM 0, implicit [[V_MUL_F32_e64_]] + ; GFX9: [[V_MAX_F32_e64_:%[0-9]+]]:vgpr_32 = V_MAX_F32_e64 2, [[V_XOR_B32_e32_]], 2, [[V_XOR_B32_e32_]], 0, 0, implicit $exec + ; GFX9: S_ENDPGM 0, implicit [[V_MAX_F32_e64_]] %0:vgpr(s32) = COPY $vgpr0 %1:vgpr(s32) = G_FNEG %0 %2:vgpr(s32) = G_FABS %1 Index: llvm/utils/UpdateTestChecks/asm.py =================================================================== --- llvm/utils/UpdateTestChecks/asm.py +++ llvm/utils/UpdateTestChecks/asm.py @@ -355,11 +355,11 @@ scrubber, function_re = handler common.build_function_body_dictionary( function_re, scrubber, [args], raw_tool_output, prefixes, - func_dict, args.verbose, False) + func_dict, args.verbose) ##### Generator of assembly CHECK lines def add_asm_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): # Label format is based on ASM string. - check_label_format = '{} %s-LABEL: %s%s:'.format(comment_marker) + check_label_format = '{} %s-LABEL: %s:'.format(comment_marker) common.add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, True, False) Index: llvm/utils/UpdateTestChecks/common.py =================================================================== --- llvm/utils/UpdateTestChecks/common.py +++ llvm/utils/UpdateTestChecks/common.py @@ -48,11 +48,11 @@ RUN_LINE_RE = re.compile(r'^\s*[;#]\s*RUN:\s*(.*)$') CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)') PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$') -CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME)?:') +CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:') OPT_FUNCTION_RE = re.compile( - r'^\s*define\s+(?:internal\s+)?[^@]*@(?P[\w-]+?)\s*' - r'(?P\((\)|(.*?[\w\.\-]+?)\))[^{]*)\{\n(?P.*?)^\}$', + r'^\s*define\s+(?:internal\s+)?[^@]*@(?P[\w-]+?)\s*\(' + r'(\s+)?[^)]*[^{]*\{\n(?P.*?)^\}$', flags=(re.M | re.S)) ANALYZE_FUNCTION_RE = re.compile( @@ -102,45 +102,18 @@ # Build up a dictionary of all the function bodies. class function_body(object): - def __init__(self, string, extra, args_and_sig): + def __init__(self, string, extra): self.scrub = string self.extrascrub = extra - self.args_and_sig = args_and_sig - def is_same_except_arg_names(self, extrascrub, args_and_sig): - arg_names = set() - def drop_arg_names(match): - arg_names.add(match.group(2)) - return match.group(1) + match.group(3) - def repl_arg_names(match): - if match.group(2) in arg_names: - return match.group(1) + match.group(3) - return match.group(1) + match.group(2) + match.group(3) - ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) - ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) - if ans0 != ans1: - return False - es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) - es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) - es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0) - es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1) - return es0 == es1 - def __str__(self): return self.scrub -def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args): +def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose): for m in function_re.finditer(raw_tool_output): if not m: continue func = m.group('func') body = m.group('body') - # Determine if we print arguments, the opening brace, or nothing after the function name - if record_args and 'args_and_sig' in m.groupdict(): - args_and_sig = scrub_body(m.group('args_and_sig').strip()) - elif 'args_and_sig' in m.groupdict(): - args_and_sig = '(' - else: - args_and_sig = '' scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False) scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True) if 'analysis' in m.groupdict(): @@ -155,10 +128,9 @@ for l in scrubbed_body.splitlines(): print(' ' + l, file=sys.stderr) for prefix in prefixes: - if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)): - if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig): + if func in func_dict[prefix] and str(func_dict[prefix][func]) != scrubbed_body: + if func_dict[prefix][func] and func_dict[prefix][func].extrascrub == scrubbed_extra: func_dict[prefix][func].scrub = scrubbed_extra - func_dict[prefix][func].args_and_sig = args_and_sig continue else: if prefix == prefixes[-1]: @@ -167,7 +139,7 @@ func_dict[prefix][func] = None continue - func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig) + func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra) ##### Generator of LLVM IR CHECK lines @@ -247,13 +219,7 @@ output_lines.append(comment_marker) printed_prefixes.append(checkprefix) - args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) - args_and_sig = genericize_check_lines([args_and_sig], is_analyze)[0] - if '[[' in args_and_sig: - output_lines.append(check_label_format % (checkprefix, func_name, '')) - output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) - else: - output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig)) + output_lines.append(check_label_format % (checkprefix, func_name)) func_body = str(func_dict[checkprefix][func_name]).splitlines() # For ASM output, just emit the check lines. @@ -307,13 +273,12 @@ def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, preserve_names): # Label format is based on IR string. - function_def_regex = 'define {{[^@]+}}' - check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex) + check_label_format = '{} %s-LABEL: @%s('.format(comment_marker) add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, preserve_names) def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): - check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) + check_label_format = '{} %s-LABEL: \'%s\''.format(comment_marker) add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True) Index: llvm/utils/gn/build/toolchain/BUILD.gn =================================================================== --- llvm/utils/gn/build/toolchain/BUILD.gn +++ llvm/utils/gn/build/toolchain/BUILD.gn @@ -207,15 +207,6 @@ use_lld = true } } - - stage2_unix_toolchain("stage2_android_arm") { - toolchain_args = { - current_os = "android" - current_cpu = "arm" - is_clang = true - use_lld = true - } - } } toolchain("win") { Index: llvm/utils/gn/build/toolchain/target_flags.gni =================================================================== --- llvm/utils/gn/build/toolchain/target_flags.gni +++ llvm/utils/gn/build/toolchain/target_flags.gni @@ -11,9 +11,6 @@ "-B$android_ndk_path/toolchains/llvm/prebuilt/linux-x86_64", ] target_ldflags += [ "-static-libstdc++" ] - if (current_cpu == "arm") { - target_flags += [ "-march=armv7-a" ] - } } if (current_cpu == "x86") { Index: llvm/utils/gn/secondary/compiler-rt/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/compiler-rt/BUILD.gn +++ llvm/utils/gn/secondary/compiler-rt/BUILD.gn @@ -1,19 +1,8 @@ -import("//llvm/utils/gn/build/toolchain/compiler.gni") - -# In the GN build, compiler-rt is always built by just-built clang and lld. -# FIXME: For macOS and iOS builds, depend on lib in all needed target arch -# toolchains and then lipo them together for the final output. -supported_toolchains = [ "//llvm/utils/gn/build/toolchain:stage2_unix" ] -if (android_ndk_path != "") { - supported_toolchains += [ - "//llvm/utils/gn/build/toolchain:stage2_android_aarch64", - "//llvm/utils/gn/build/toolchain:stage2_android_arm", - ] -} - group("compiler-rt") { - deps = [] - foreach(toolchain, supported_toolchains) { - deps += [ "//compiler-rt/lib($toolchain)" ] - } + # In the GN build, compiler-rt is always built by just-built clang and lld. + # FIXME: For macOS and iOS builds, depend on lib in all needed target arch + # toolchains and then lipo them together for the final output. + deps = [ + "//compiler-rt/lib(//llvm/utils/gn/build/toolchain:stage2_unix)", + ] } Index: llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn +++ llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn @@ -24,7 +24,7 @@ "-fPIC", "-fno-builtin", ] - if (current_os != "android") { + if (target_os != "android") { cflags += [ "-fvisibility=hidden" ] } if (!is_debug) { @@ -164,7 +164,7 @@ "gcc_personality_v0.c", ] - if (current_os != "fuchsia") { + if (target_os != "fuchsia") { sources += [ "emutls.c", "enable_execute_stack.c", @@ -172,7 +172,7 @@ ] } - if (current_os != "fuchsia" && !compiler_rt_baremetal_build) { + if (target_os != "fuchsia" && !compiler_rt_baremetal_build) { sources += [ # This comment prevents `gn format` from putting the file on the same line # as `sources +=`, for sync_source_lists_from_cmake.py. @@ -180,7 +180,7 @@ ] } - if (current_os == "mac") { + if (target_os == "mac") { sources += [ "atomic_flag_clear.c", "atomic_flag_clear_explicit.c", @@ -191,7 +191,7 @@ ] } - if ((current_cpu == "x64" && current_os != "win") || current_cpu == "arm64") { + if ((target_cpu == "x64" && target_os != "win") || target_cpu == "arm64") { # GENERIC_TF_SOURCES sources += [ "comparetf2.c", @@ -215,7 +215,7 @@ ] } - if (current_cpu == "x86" || current_cpu == "x64") { + if (target_cpu == "x86" || target_cpu == "x64") { sources += [ "cpu_model.c", "divxc3.c", @@ -232,7 +232,7 @@ "powixf2.c", ] } - if (current_cpu == "x86") { + if (target_cpu == "x86") { sources -= [ "ashldi3.c", "ashrdi3.c", @@ -265,13 +265,13 @@ "i386/udivdi3.S", "i386/umoddi3.S", ] - if (current_os == "win") { + if (target_os == "win") { sources += [ "i386/chkstk.S", "i386/chkstk2.S", ] } - } else if (current_cpu == "x64") { + } else if (target_cpu == "x64") { sources -= [ "floatdidf.c", "floatdisf.c", @@ -288,7 +288,7 @@ "x86_64/floatundisf.S", "x86_64/floatundixf.S", ] - if (current_os == "win") { + if (target_os == "win") { sources += [ "x86_64/chkstk.S", "x86_64/chkstk2.S", @@ -296,8 +296,8 @@ } } - if (current_cpu == "arm") { - if (current_os != "mingw") { + if (target_cpu == "arm") { + if (target_os != "mingw") { sources -= [ "bswapdi2.c", "bswapsi2.c", @@ -306,30 +306,12 @@ "comparesf2.c", "divmodsi4.c", "divsi3.c", - "fp_mode.c", "modsi3.c", "udivmodsi4.c", "udivsi3.c", "umodsi3.c", ] sources += [ - "arm/aeabi_cdcmp.S", - "arm/aeabi_cdcmpeq_check_nan.c", - "arm/aeabi_cfcmp.S", - "arm/aeabi_cfcmpeq_check_nan.c", - "arm/aeabi_dcmp.S", - "arm/aeabi_div0.c", - "arm/aeabi_drsub.c", - "arm/aeabi_fcmp.S", - "arm/aeabi_frsub.c", - "arm/aeabi_idivmod.S", - "arm/aeabi_ldivmod.S", - "arm/aeabi_memcmp.S", - "arm/aeabi_memcpy.S", - "arm/aeabi_memmove.S", - "arm/aeabi_memset.S", - "arm/aeabi_uidivmod.S", - "arm/aeabi_uldivmod.S", "arm/bswapdi2.S", "arm/bswapsi2.S", "arm/clzdi2.S", @@ -339,10 +321,6 @@ "arm/divsi3.S", "arm/fp_mode.c", "arm/modsi3.S", - "arm/switch16.S", - "arm/switch32.S", - "arm/switch8.S", - "arm/switchu8.S", "arm/sync_fetch_and_add_4.S", "arm/sync_fetch_and_add_8.S", "arm/sync_fetch_and_and_4.S", @@ -363,52 +341,10 @@ "arm/sync_fetch_and_umin_8.S", "arm/sync_fetch_and_xor_4.S", "arm/sync_fetch_and_xor_8.S", - "arm/sync_synchronize.S", "arm/udivmodsi4.S", "arm/udivsi3.S", "arm/umodsi3.S", ] - - if (current_os == "android") { - sources += [ - "arm/adddf3vfp.S", - "arm/addsf3vfp.S", - "arm/divdf3vfp.S", - "arm/divsf3vfp.S", - "arm/eqdf2vfp.S", - "arm/eqsf2vfp.S", - "arm/extendsfdf2vfp.S", - "arm/fixdfsivfp.S", - "arm/fixsfsivfp.S", - "arm/fixunsdfsivfp.S", - "arm/fixunssfsivfp.S", - "arm/floatsidfvfp.S", - "arm/floatsisfvfp.S", - "arm/floatunssidfvfp.S", - "arm/floatunssisfvfp.S", - "arm/gedf2vfp.S", - "arm/gesf2vfp.S", - "arm/gtdf2vfp.S", - "arm/gtsf2vfp.S", - "arm/ledf2vfp.S", - "arm/lesf2vfp.S", - "arm/ltdf2vfp.S", - "arm/ltsf2vfp.S", - "arm/muldf3vfp.S", - "arm/mulsf3vfp.S", - "arm/nedf2vfp.S", - "arm/negdf2vfp.S", - "arm/negsf2vfp.S", - "arm/nesf2vfp.S", - "arm/restore_vfp_d8_d15_regs.S", - "arm/save_vfp_d8_d15_regs.S", - "arm/subdf3vfp.S", - "arm/subsf3vfp.S", - "arm/truncdfsf2vfp.S", - "arm/unorddf2vfp.S", - "arm/unordsf2vfp.S", - ] - } } else { sources += [ "arm/aeabi_idivmod.S", @@ -421,13 +357,13 @@ } } - if (current_cpu == "arm64") { + if (target_cpu == "arm64") { sources += [ # This comment prevents `gn format` from putting the file on the same line # as `sources +=`, for sync_source_lists_from_cmake.py. "aarch64/fp_mode.c", ] - if (current_os == "mingw") { + if (target_os == "mingw") { sources += [ # This comment prevents `gn format` from putting the file on the same # line as `sources +=`, for sync_source_lists_from_cmake.py. @@ -436,7 +372,7 @@ } } - if (current_cpu == "hexagon") { + if (target_cpu == "hexagon") { sources += [ "hexagon/common_entry_exit_abi1.S", "hexagon/common_entry_exit_abi2.S", @@ -471,7 +407,7 @@ ] } - if (current_cpu == "ppc64") { + if (target_cpu == "ppc64") { sources += [ "ppc/divtc3.c", "ppc/fixtfdi.c", @@ -488,7 +424,7 @@ ] } - if (current_cpu == "riscv") { + if (target_cpu == "riscv") { sources += [ # This comment prevents `gn format` from putting the file on the same line # as `sources +=`, for sync_source_lists_from_cmake.py. @@ -514,5 +450,73 @@ "arm/comparesf2.S", "arm/divsi3.S", "arm/udivsi3.S", + + # EABI + "arm/aeabi_cdcmp.S", + "arm/aeabi_cdcmpeq_check_nan.c", + "arm/aeabi_cfcmp.S", + "arm/aeabi_cfcmpeq_check_nan.c", + "arm/aeabi_dcmp.S", + "arm/aeabi_div0.c", + "arm/aeabi_drsub.c", + "arm/aeabi_fcmp.S", + "arm/aeabi_frsub.c", + "arm/aeabi_idivmod.S", + "arm/aeabi_ldivmod.S", + "arm/aeabi_memcmp.S", + "arm/aeabi_memcpy.S", + "arm/aeabi_memmove.S", + "arm/aeabi_memset.S", + "arm/aeabi_uidivmod.S", + "arm/aeabi_uldivmod.S", + + # Thumb1 JT + "arm/switch16.S", + "arm/switch32.S", + "arm/switch8.S", + "arm/switchu8.S", + + # Thumb1 SjLj + "arm/restore_vfp_d8_d15_regs.S", + "arm/save_vfp_d8_d15_regs.S", + + # Thumb1 VFPv2 + "arm/adddf3vfp.S", + "arm/addsf3vfp.S", + "arm/divdf3vfp.S", + "arm/divsf3vfp.S", + "arm/eqdf2vfp.S", + "arm/eqsf2vfp.S", + "arm/extendsfdf2vfp.S", + "arm/fixdfsivfp.S", + "arm/fixsfsivfp.S", + "arm/fixunsdfsivfp.S", + "arm/fixunssfsivfp.S", + "arm/floatsidfvfp.S", + "arm/floatsisfvfp.S", + "arm/floatunssidfvfp.S", + "arm/floatunssisfvfp.S", + "arm/gedf2vfp.S", + "arm/gesf2vfp.S", + "arm/gtdf2vfp.S", + "arm/gtsf2vfp.S", + "arm/ledf2vfp.S", + "arm/lesf2vfp.S", + "arm/ltdf2vfp.S", + "arm/ltsf2vfp.S", + "arm/muldf3vfp.S", + "arm/mulsf3vfp.S", + "arm/nedf2vfp.S", + "arm/negdf2vfp.S", + "arm/negsf2vfp.S", + "arm/nesf2vfp.S", + "arm/subdf3vfp.S", + "arm/subsf3vfp.S", + "arm/truncdfsf2vfp.S", + "arm/unorddf2vfp.S", + "arm/unordsf2vfp.S", + + # Thumb1 icache + "arm/sync_synchronize.S", ] } Index: llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn +++ llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn @@ -12,7 +12,7 @@ configs -= [ "//llvm/utils/gn/build:thin_archive" ] cflags = [] - if (current_os != "win") { + if (target_os != "win") { cflags = [ "-fPIC", "-Wno-pedantic", @@ -47,14 +47,14 @@ "InstrProfilingValue.c", "InstrProfilingWriter.c", ] - if (current_os == "win") { + if (target_os == "win") { sources += [ "WindowsMMap.c", "WindowsMMap.h", ] } - if (current_os != "win") { + if (target_os != "win") { defines = [ "COMPILER_RT_TARGET_HAS_ATOMICS", "COMPILER_RT_TARGET_HAS_FCNTL_LCK", Index: llvm/utils/gn/secondary/compiler-rt/target.gni =================================================================== --- llvm/utils/gn/secondary/compiler-rt/target.gni +++ llvm/utils/gn/secondary/compiler-rt/target.gni @@ -5,8 +5,6 @@ crt_current_target_arch = "i386" } else if (current_cpu == "x64") { crt_current_target_arch = "x86_64" -} else if (current_cpu == "arm") { - crt_current_target_arch = "arm" } else if (current_cpu == "arm64") { crt_current_target_arch = "aarch64" } else if (current_cpu == "ppc64") { Index: llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn +++ llvm/utils/gn/secondary/llvm/lib/Remarks/BUILD.gn @@ -10,7 +10,6 @@ "BitstreamRemarkSerializer.cpp", "Remark.cpp", "RemarkFormat.cpp", - "RemarkLinker.cpp", "RemarkParser.cpp", "RemarkSerializer.cpp", "RemarkStringTable.cpp", Index: llvm/utils/gn/secondary/llvm/triples.gni =================================================================== --- llvm/utils/gn/secondary/llvm/triples.gni +++ llvm/utils/gn/secondary/llvm/triples.gni @@ -12,10 +12,6 @@ } else if (current_os == "win") { llvm_current_triple = "x86_64-pc-windows-msvc" } -} else if (current_cpu == "arm") { - if (current_os == "android") { - llvm_current_triple = "arm-linux-androideabi" - } } else if (current_cpu == "arm64") { if (current_os == "android") { llvm_current_triple = "aarch64-linux-android21" Index: llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn +++ llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn @@ -4,8 +4,12 @@ deps = [ "//llvm/lib/ExecutionEngine/JITLink", "//llvm/lib/ExecutionEngine/RuntimeDyld", + "//llvm/lib/MC", + "//llvm/lib/MC/MCDisassembler", + "//llvm/lib/MC/MCParser", "//llvm/lib/Object", "//llvm/lib/Support", + "//llvm/lib/Target", "//llvm/lib/Target:TargetsToBuild", "//llvm/lib/Testing/Support", ] Index: llvm/utils/gn/secondary/llvm/unittests/Remarks/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/unittests/Remarks/BUILD.gn +++ llvm/utils/gn/secondary/llvm/unittests/Remarks/BUILD.gn @@ -11,7 +11,6 @@ "BitstreamRemarksParsingTest.cpp", "BitstreamRemarksSerializerTest.cpp", "RemarksAPITest.cpp", - "RemarksLinkingTest.cpp", "RemarksStrTabParsingTest.cpp", "YAMLRemarksParsingTest.cpp", "YAMLRemarksSerializerTest.cpp", Index: llvm/utils/gn/secondary/llvm/unittests/Transforms/Scalar/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/unittests/Transforms/Scalar/BUILD.gn +++ llvm/utils/gn/secondary/llvm/unittests/Transforms/Scalar/BUILD.gn @@ -5,14 +5,11 @@ "//llvm/lib/Analysis", "//llvm/lib/AsmParser", "//llvm/lib/IR", - "//llvm/lib/Passes", "//llvm/lib/Support", - "//llvm/lib/Testing/Support", "//llvm/lib/Transforms/Scalar", "//llvm/lib/Transforms/Utils", ] sources = [ - "LICMTest.cpp", "LoopPassManagerTest.cpp", ] } Index: llvm/utils/lit/lit/ProgressBar.py =================================================================== --- llvm/utils/lit/lit/ProgressBar.py +++ llvm/utils/lit/lit/ProgressBar.py @@ -241,7 +241,7 @@ self.useETA = useETA if self.useETA: self.startTime = time.time() - # self.update(0, '') + self.update(0, '') def update(self, percent, message): if self.cleared: Index: llvm/utils/lit/lit/TestRunner.py =================================================================== --- llvm/utils/lit/lit/TestRunner.py +++ llvm/utils/lit/lit/TestRunner.py @@ -260,27 +260,6 @@ env.env[key] = val return args[arg_idx_next:] -def executeBuiltinCd(cmd, shenv): - """executeBuiltinCd - Change the current directory.""" - if len(cmd.args) != 2: - raise InternalShellError("'cd' supports only one argument") - newdir = cmd.args[1] - # Update the cwd in the parent environment. - if os.path.isabs(newdir): - shenv.cwd = newdir - else: - shenv.cwd = os.path.realpath(os.path.join(shenv.cwd, newdir)) - # The cd builtin always succeeds. If the directory does not exist, the - # following Popen calls will fail instead. - return ShellCommandResult(cmd, "", "", 0, False) - -def executeBuiltinExport(cmd, shenv): - """executeBuiltinExport - Set an environment variable.""" - if len(cmd.args) != 2: - raise InternalShellError("'export' supports only one argument") - updateEnv(shenv, cmd.args) - return ShellCommandResult(cmd, "", "", 0, False) - def executeBuiltinEcho(cmd, shenv): """Interpret a redirected echo command""" opened_files = [] @@ -340,8 +319,9 @@ for (name, mode, f, path) in opened_files: f.close() - output = "" if is_redirected else stdout.getvalue() - return ShellCommandResult(cmd, output, "", 0, False) + if not is_redirected: + return stdout.getvalue() + return "" def executeBuiltinMkdir(cmd, cmd_shenv): """executeBuiltinMkdir - Create new directories.""" @@ -476,10 +456,6 @@ exitCode = 1 return ShellCommandResult(cmd, "", stderr.getvalue(), exitCode, False) -def executeBuiltinColon(cmd, cmd_shenv): - """executeBuiltinColon - Discard arguments and exit with status 0.""" - return ShellCommandResult(cmd, "", "", 0, False) - def processRedirects(cmd, stdin_source, cmd_shenv, opened_files): """Return the standard fds for cmd after applying redirects @@ -605,6 +581,64 @@ raise ValueError('Unknown shell command: %r' % cmd.op) assert isinstance(cmd, ShUtil.Pipeline) + # Handle shell builtins first. + if cmd.commands[0].args[0] == 'cd': + if len(cmd.commands) != 1: + raise ValueError("'cd' cannot be part of a pipeline") + if len(cmd.commands[0].args) != 2: + raise ValueError("'cd' supports only one argument") + newdir = cmd.commands[0].args[1] + # Update the cwd in the parent environment. + if os.path.isabs(newdir): + shenv.cwd = newdir + else: + shenv.cwd = os.path.realpath(os.path.join(shenv.cwd, newdir)) + # The cd builtin always succeeds. If the directory does not exist, the + # following Popen calls will fail instead. + return 0 + + # Handle "echo" as a builtin if it is not part of a pipeline. This greatly + # speeds up tests that construct input files by repeatedly echo-appending to + # a file. + # FIXME: Standardize on the builtin echo implementation. We can use a + # temporary file to sidestep blocking pipe write issues. + if cmd.commands[0].args[0] == 'echo' and len(cmd.commands) == 1: + output = executeBuiltinEcho(cmd.commands[0], shenv) + results.append(ShellCommandResult(cmd.commands[0], output, "", 0, + False)) + return 0 + + if cmd.commands[0].args[0] == 'export': + if len(cmd.commands) != 1: + raise ValueError("'export' cannot be part of a pipeline") + if len(cmd.commands[0].args) != 2: + raise ValueError("'export' supports only one argument") + updateEnv(shenv, cmd.commands[0].args) + return 0 + + if cmd.commands[0].args[0] == 'mkdir': + if len(cmd.commands) != 1: + raise InternalShellError(cmd.commands[0], "Unsupported: 'mkdir' " + "cannot be part of a pipeline") + cmdResult = executeBuiltinMkdir(cmd.commands[0], shenv) + results.append(cmdResult) + return cmdResult.exitCode + + if cmd.commands[0].args[0] == 'rm': + if len(cmd.commands) != 1: + raise InternalShellError(cmd.commands[0], "Unsupported: 'rm' " + "cannot be part of a pipeline") + cmdResult = executeBuiltinRm(cmd.commands[0], shenv) + results.append(cmdResult) + return cmdResult.exitCode + + if cmd.commands[0].args[0] == ':': + if len(cmd.commands) != 1: + raise InternalShellError(cmd.commands[0], "Unsupported: ':' " + "cannot be part of a pipeline") + results.append(ShellCommandResult(cmd.commands[0], '', '', 0, False)) + return 0; + procs = [] default_stdin = subprocess.PIPE stderrTempFiles = [] @@ -612,12 +646,6 @@ named_temp_files = [] builtin_commands = set(['cat', 'diff']) builtin_commands_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "builtin_commands") - inproc_builtins = {'cd': executeBuiltinCd, - 'export': executeBuiltinExport, - 'echo': executeBuiltinEcho, - 'mkdir': executeBuiltinMkdir, - 'rm': executeBuiltinRm, - ':': executeBuiltinColon} # To avoid deadlock, we use a single stderr stream for piped # output. This is null until we have seen some output using # stderr. @@ -635,27 +663,6 @@ raise InternalShellError(j, "Error: 'env' requires a subcommand") - # Handle in-process builtins. - # - # Handle "echo" as a builtin if it is not part of a pipeline. This - # greatly speeds up tests that construct input files by repeatedly - # echo-appending to a file. - # FIXME: Standardize on the builtin echo implementation. We can use a - # temporary file to sidestep blocking pipe write issues. - inproc_builtin = inproc_builtins.get(args[0], None) - if inproc_builtin and (args[0] != 'echo' or len(cmd.commands) == 1): - # env calling an in-process builtin is useless, so we take the safe - # approach of complaining. - if not cmd_shenv is shenv: - raise InternalShellError(j, "Error: 'env' cannot call '{}'" - .format(args[0])) - if len(cmd.commands) != 1: - raise InternalShellError(j, "Unsupported: '{}' cannot be part" - " of a pipeline".format(args[0])) - result = inproc_builtin(j, cmd_shenv) - results.append(result) - return result.exitCode - stdin, stdout, stderr = processRedirects(j, default_stdin, cmd_shenv, opened_files) Index: llvm/utils/lit/lit/display.py =================================================================== --- llvm/utils/lit/lit/display.py +++ llvm/utils/lit/lit/display.py @@ -1,45 +1,46 @@ import sys +import lit.ProgressBar def create_display(opts, tests, total_tests, workers): if opts.quiet: - return NopDisplay() + return NopProgressDisplay() of_total = (' of %d' % total_tests) if (tests != total_tests) else '' header = '-- Testing: %d%s tests, %d workers --' % (tests, of_total, workers) progress_bar = None if opts.succinct and opts.useProgressBar: - import lit.ProgressBar try: tc = lit.ProgressBar.TerminalController() progress_bar = lit.ProgressBar.ProgressBar(tc, header) - header = None except ValueError: + print(header) progress_bar = lit.ProgressBar.SimpleProgressBar('Testing: ') + else: + print(header) - return Display(opts, tests, header, progress_bar) + if progress_bar: + progress_bar.update(0, '') + return ProgressDisplay(opts, tests, progress_bar) -class NopDisplay(object): - def print_header(self): pass +class NopProgressDisplay(object): def update(self, test): pass - def clear(self): pass + def finish(self): pass - -class Display(object): - def __init__(self, opts, tests, header, progress_bar): +class ProgressDisplay(object): + def __init__(self, opts, numTests, progressBar): self.opts = opts - self.tests = tests - self.header = header - self.progress_bar = progress_bar + self.numTests = numTests + self.progressBar = progressBar self.completed = 0 - def print_header(self): - if self.header: - print(self.header) - if self.progress_bar: - self.progress_bar.update(0.0, '') + def finish(self): + if self.progressBar: + self.progressBar.clear() + elif self.opts.succinct: + sys.stdout.write('\n') def update(self, test): self.completed += 1 @@ -48,26 +49,22 @@ self.opts.showAllOutput or \ (not self.opts.quiet and not self.opts.succinct) if show_result: - if self.progress_bar: - self.progress_bar.clear() self.print_result(test) - if self.progress_bar: + if self.progressBar: if test.isFailure(): - self.progress_bar.barColor = 'RED' - percent = float(self.completed) / self.tests - self.progress_bar.update(percent, test.getFullName()) - - def clear(self): - if self.progress_bar: - self.progress_bar.clear() - sys.stdout.write('\n') + self.progressBar.barColor = 'RED' + percent = float(self.completed) / self.numTests + self.progressBar.update(percent, test.getFullName()) def print_result(self, test): + if self.progressBar: + self.progressBar.clear() + # Show the test result line. test_name = test.getFullName() print('%s: %s (%d of %d)' % (test.result.code.name, test_name, - self.completed, self.tests)) + self.completed, self.numTests)) # Show the test failure output, if requested. if (test.isFailure() and self.opts.showOutput) or \ Index: llvm/utils/lit/lit/main.py =================================================================== --- llvm/utils/lit/lit/main.py +++ llvm/utils/lit/lit/main.py @@ -179,7 +179,6 @@ run = lit.run.create_run(tests, litConfig, opts.numWorkers, progress_callback, opts.timeout) - display.print_header() try: elapsed = execute_in_tmp_dir(run, litConfig) except KeyboardInterrupt: @@ -189,9 +188,9 @@ # TODO(yln): change display to update when test starts, not when test completes # Ensure everything still works with SimpleProgressBar as well # finally: - # display.clear() + # display.finish() - display.clear() + display.finish() return elapsed def execute_in_tmp_dir(run, litConfig): Index: llvm/utils/lit/tests/shtest-env.py =================================================================== --- llvm/utils/lit/tests/shtest-env.py +++ llvm/utils/lit/tests/shtest-env.py @@ -7,7 +7,7 @@ # Make sure env commands are included in printed commands. -# CHECK: -- Testing: 13 tests{{.*}} +# CHECK: -- Testing: 7 tests{{.*}} # CHECK: FAIL: shtest-env :: env-args-last-is-assign.txt ({{[^)]*}}) # CHECK: Error: 'env' requires a subcommand @@ -25,52 +25,19 @@ # CHECK: Error: 'env' requires a subcommand # CHECK: error: command failed with exit status: {{.*}} -# CHECK: FAIL: shtest-env :: env-calls-cd.txt ({{[^)]*}}) -# CHECK: $ "env" "-u" "FOO" "BAR=3" "cd" "foobar" -# CHECK: Error: 'env' cannot call 'cd' -# CHECK: error: command failed with exit status: {{.*}} - -# CHECK: FAIL: shtest-env :: env-calls-colon.txt ({{[^)]*}}) -# CHECK: $ "env" "-u" "FOO" "BAR=3" ":" -# CHECK: Error: 'env' cannot call ':' -# CHECK: error: command failed with exit status: {{.*}} - -# CHECK: FAIL: shtest-env :: env-calls-echo.txt ({{[^)]*}}) -# CHECK: $ "env" "-u" "FOO" "BAR=3" "echo" "hello" "world" -# CHECK: Error: 'env' cannot call 'echo' -# CHECK: error: command failed with exit status: {{.*}} - -# CHECK: FAIL: shtest-env :: env-calls-export.txt ({{[^)]*}}) -# CHECK: $ "env" "-u" "FOO" "BAR=3" "export" "BAZ=3" -# CHECK: Error: 'env' cannot call 'export' -# CHECK: error: command failed with exit status: {{.*}} - -# CHECK: FAIL: shtest-env :: env-calls-mkdir.txt ({{[^)]*}}) -# CHECK: $ "env" "-u" "FOO" "BAR=3" "mkdir" "foobar" -# CHECK: Error: 'env' cannot call 'mkdir' -# CHECK: error: command failed with exit status: {{.*}} - -# CHECK: FAIL: shtest-env :: env-calls-rm.txt ({{[^)]*}}) -# CHECK: $ "env" "-u" "FOO" "BAR=3" "rm" "foobar" -# CHECK: Error: 'env' cannot call 'rm' -# CHECK: error: command failed with exit status: {{.*}} - # CHECK: PASS: shtest-env :: env-u.txt ({{[^)]*}}) # CHECK: $ "{{[^"]*}}" "print_environment.py" # CHECK: $ "env" "-u" "FOO" "{{[^"]*}}" "print_environment.py" # CHECK: $ "env" "-u" "FOO" "-u" "BAR" "{{[^"]*}}" "print_environment.py" -# CHECK-NOT: ${{.*}}print_environment.py # CHECK: PASS: shtest-env :: env.txt ({{[^)]*}}) # CHECK: $ "env" "A_FOO=999" "{{[^"]*}}" "print_environment.py" # CHECK: $ "env" "A_FOO=1" "B_BAR=2" "C_OOF=3" "{{[^"]*}}" "print_environment.py" -# CHECK-NOT: ${{.*}}print_environment.py # CHECK: PASS: shtest-env :: mixed.txt ({{[^)]*}}) # CHECK: $ "env" "A_FOO=999" "-u" "FOO" "{{[^"]*}}" "print_environment.py" # CHECK: $ "env" "A_FOO=1" "-u" "FOO" "B_BAR=2" "-u" "BAR" "C_OOF=3" "{{[^"]*}}" "print_environment.py" -# CHECK-NOT: ${{.*}}print_environment.py # CHECK: Expected Passes : 3 -# CHECK: Unexpected Failures: 10 +# CHECK: Unexpected Failures: 4 # CHECK-NOT: {{.}} Index: llvm/utils/update_analyze_test_checks.py =================================================================== --- llvm/utils/update_analyze_test_checks.py +++ llvm/utils/update_analyze_test_checks.py @@ -146,7 +146,7 @@ for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs): common.build_function_body_dictionary( common.ANALYZE_FUNCTION_RE, common.scrub_body, [], - raw_tool_output, prefixes, func_dict, args.verbose, False) + raw_tool_output, prefixes, func_dict, args.verbose) is_in_function = False is_in_function_start = False Index: llvm/utils/update_cc_test_checks.py =================================================================== --- llvm/utils/update_cc_test_checks.py +++ llvm/utils/update_cc_test_checks.py @@ -117,18 +117,6 @@ print('Please specify --llvm-bin or --clang', file=sys.stderr) sys.exit(1) - # Determine the builtin includes directory so that we can update tests that - # depend on the builtin headers. See get_clang_builtin_include_dir() and - # use_clang() in llvm/utils/lit/lit/llvm/config.py. - try: - builtin_include_dir = subprocess.check_output( - [args.clang, '-print-file-name=include']).decode().strip() - SUBST['%clang_cc1'] = ['-cc1', '-internal-isystem', builtin_include_dir, - '-nostdsysteminc'] - except subprocess.CalledProcessError: - common.warn('Could not determine clang builtins directory, some tests ' - 'might not update correctly.') - if args.opt is None: if args.llvm_bin is None: args.opt = 'opt' @@ -172,7 +160,7 @@ if '-emit-llvm' in clang_args: common.build_function_body_dictionary( common.OPT_FUNCTION_RE, common.scrub_body, [], - raw_tool_output, prefixes, func_dict, args.verbose, False) + raw_tool_output, prefixes, func_dict, args.verbose) else: print('The clang command line should include -emit-llvm as asm tests ' 'are discouraged in Clang testsuite.', file=sys.stderr) Index: llvm/utils/update_test_checks.py =================================================================== --- llvm/utils/update_test_checks.py +++ llvm/utils/update_test_checks.py @@ -66,8 +66,6 @@ help='Only update test if it was already autogened') parser.add_argument('-p', '--preserve-names', action='store_true', help='Do not scrub IR names') - parser.add_argument('--function-signature', action='store_true', - help='Keep function signature information around for the check line') parser.add_argument('tests', nargs='+') args = parser.parse_args() @@ -157,8 +155,7 @@ raw_tool_output = common.invoke_tool(args.opt_binary, opt_args, test) common.build_function_body_dictionary( common.OPT_FUNCTION_RE, common.scrub_body, [], - raw_tool_output, prefixes, func_dict, args.verbose, - args.function_signature) + raw_tool_output, prefixes, func_dict, args.verbose) is_in_function = False is_in_function_start = False