From 6deeff463c3530a99faef70de6d1485bb2321a37 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 28 Dec 2023 06:57:13 -0800 Subject: [PATCH 01/33] Optimize resetAllRegistersState() --- src/coreclr/jit/lsra.cpp | 24 +++++++++++++++++------- src/coreclr/jit/lsra.h | 2 ++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 37d85e23b9150..ed2be41cb1aa1 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -308,6 +308,11 @@ regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInte return result; } +void LinearScan::clearAllNextIntervalRef() +{ + memset(nextIntervalRef, MaxLocation, AVAILABLE_REG_COUNT * sizeof(LsraLocation)); +} + void LinearScan::clearNextIntervalRef(regNumber reg, var_types regType) { nextIntervalRef[reg] = MaxLocation; @@ -321,6 +326,11 @@ void LinearScan::clearNextIntervalRef(regNumber reg, var_types regType) #endif } +void LinearScan::clearAllSpillCost() +{ + memset(spillCost, 0, AVAILABLE_REG_COUNT * sizeof(weight_t)); +} + void LinearScan::clearSpillCost(regNumber reg, var_types regType) { spillCost[reg] = 0; @@ -4180,17 +4190,17 @@ void LinearScan::resetAllRegistersState() assert(!enregisterLocalVars); // Just clear any constant registers and return. resetAvailableRegs(); + clearAllNextIntervalRef(); + clearAllSpillCost(); + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) { RegRecord* physRegRecord = getRegisterRecord(reg); +#ifdef DEBUG Interval* assignedInterval = physRegRecord->assignedInterval; - clearNextIntervalRef(reg, physRegRecord->registerType); - clearSpillCost(reg, physRegRecord->registerType); - if (assignedInterval != nullptr) - { - assert(assignedInterval->isConstant); - physRegRecord->assignedInterval = nullptr; - } + assert(assignedInterval == nullptr || assignedInterval->isConstant); +#endif + physRegRecord->assignedInterval = nullptr; } } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a708662882a06..a4ce6ec07000e 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1777,9 +1777,11 @@ class LinearScan : public LinearScanInterface makeRegsAvailable(regMask); } + void clearAllNextIntervalRef(); void clearNextIntervalRef(regNumber reg, var_types regType); void updateNextIntervalRef(regNumber reg, Interval* interval); + void clearAllSpillCost(); void clearSpillCost(regNumber reg, var_types regType); void updateSpillCost(regNumber reg, Interval* interval); From 7ff5cd303fdc64437da05b8dbf29c6f1b8083dde Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 28 Dec 2023 07:57:56 -0800 Subject: [PATCH 02/33] Add assert about refType for minopts --- src/coreclr/jit/lsrabuild.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f3aaba0847f68..86856b7e7b89f 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -185,6 +185,11 @@ RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* t // the allocation table. currBuildNode = nullptr; newRP->rpNum = static_cast(refPositions.size() - 1); + if (!enregisterLocalVars) + { + assert(!((refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || (refType == RefTypeDummyDef) || + (refType == RefTypeExpUse))); + } #endif // DEBUG return newRP; } From 33852e42e56166945336c13d2fe3fc48c8ced81c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 09:09:59 -0800 Subject: [PATCH 03/33] Templatize processBlockEndAllocation() --- src/coreclr/jit/lsra.cpp | 3 ++- src/coreclr/jit/lsra.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index ed2be41cb1aa1..1fa77db5ed99a 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3914,12 +3914,13 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) // Calls processBlockEndLocations() to set the outVarToRegMap, then gets the next block, // and sets the inVarToRegMap appropriately. // +template void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock) { assert(currentBlock != nullptr); markBlockVisited(currentBlock); - if (enregisterLocalVars) + if (localVarsEnregistered) { processBlockEndLocations(currentBlock); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a4ce6ec07000e..8b7ddcb863a7e 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1001,6 +1001,7 @@ class LinearScan : public LinearScanInterface // Update allocations at start/end of block void unassignIntervalBlockStart(RegRecord* regRecord, VarToRegMap inVarToRegMap); + template void processBlockEndAllocation(BasicBlock* current); // Record variable locations at start/end of block From d42e5dae280f862fd9e7d3b900c912be67ae351c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 09:18:02 -0800 Subject: [PATCH 04/33] Add resetMinOpts() --- src/coreclr/jit/lsra.cpp | 14 ++++++++++++++ src/coreclr/jit/lsra.h | 1 + 2 files changed, 15 insertions(+) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 1fa77db5ed99a..baf0dd8b13805 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -11550,6 +11550,20 @@ LinearScan::RegisterSelection::RegisterSelection(LinearScan* linearScan) #endif // DEBUG } +// ---------------------------------------------------------- +// reset: Resets the values of all the fields used for register selection. +// +void LinearScan::RegisterSelection::resetMinOpts(Interval* interval, RefPosition* refPos) +{ + currentInterval = interval; + refPosition = refPos; + + regType = linearScan->getRegisterType(currentInterval, refPosition); + candidates = refPosition->registerAssignment; + freeCandidates = RBM_NONE; + found = false; +} + // ---------------------------------------------------------- // reset: Resets the values of all the fields used for register selection. // diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 8b7ddcb863a7e..5ec264def8b57 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1346,6 +1346,7 @@ class LinearScan : public LinearScanInterface bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate); FORCEINLINE void calculateCoversSets(); FORCEINLINE void reset(Interval* interval, RefPosition* refPosition); + FORCEINLINE void resetMinOpts(Interval* interval, RefPosition* refPosition); #define REG_SEL_DEF(stat, value, shortname, orderSeqId) FORCEINLINE void try_##stat(); #define BUSY_REG_SEL_DEF(stat, value, shortname, orderSeqId) REG_SEL_DEF(stat, value, shortname, orderSeqId) From 0d0f27f18f368e85c28a48e03d605512f3471a42 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 09:20:21 -0800 Subject: [PATCH 05/33] Add selectMinOpts() --- src/coreclr/jit/lsra.cpp | 178 +++++++++++++++++++++++++++++++++++++++ src/coreclr/jit/lsra.h | 3 + 2 files changed, 181 insertions(+) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index baf0dd8b13805..d7b8860a1da08 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12724,6 +12724,184 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, } calculateCoversSets(); +// ---------------------------------------------------------- +// selectMinOpts: For given `currentInterval` and `refPosition`, selects a register to be assigned. +// +// Arguments: +// currentInterval - Current interval for which register needs to be selected. +// refPosition - Refposition within the interval for which register needs to be selected. +// +// Return Values: +// Register bit selected (a single register) and REG_NA if no register was selected. +// +regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) +{ +#ifdef DEBUG + *registerScore = NONE; +#endif + +#ifdef TARGET_ARM64 + assert(!refPosition->needsConsecutive); +#endif + + resetMinOpts(currentInterval, refPosition); + + // process data-structures + if (RefTypeIsDef(refPosition->refType)) + { + RefPosition* nextRefPos = refPosition->nextRefPosition; + if (currentInterval->hasConflictingDefUse) + { + linearScan->resolveConflictingDefAndUse(currentInterval, refPosition); + candidates = refPosition->registerAssignment; + } + // Otherwise, check for the case of a fixed-reg def of a reg that will be killed before the + // use, or interferes at the point of use (which shouldn't happen, but Lower doesn't mark + // the contained nodes as interfering). + // Note that we may have a ParamDef RefPosition that is marked isFixedRegRef, but which + // has had its registerAssignment changed to no longer be a single register. + else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) && + !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment)) + { + regNumber defReg = refPosition->assignedReg(); + RegRecord* defRegRecord = linearScan->getRegisterRecord(defReg); + + RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition; + assert(currFixedRegRefPosition != nullptr && + currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation); + + // If there is another fixed reference to this register before the use, change the candidates + // on this RefPosition to include that of nextRefPos. + RefPosition* nextFixedRegRefPosition = defRegRecord->getNextRefPosition(); + if (nextFixedRegRefPosition != nullptr && + nextFixedRegRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) + { + candidates |= nextRefPos->registerAssignment; + } + } + } + +#ifdef DEBUG + candidates = linearScan->stressLimitRegs(refPosition, candidates); +#endif + assert(candidates != RBM_NONE); + + bool avoidByteRegs = false; +#ifdef TARGET_X86 + if ((relatedPreferences & ~RBM_BYTE_REGS) != RBM_NONE) + { + avoidByteRegs = true; + } +#endif + + // Is this a fixedReg? + regMaskTP fixedRegMask = RBM_NONE; + if (refPosition->isFixedRegRef) + { + assert(genMaxOneBit(refPosition->registerAssignment)); + if (candidates == refPosition->registerAssignment) + { + found = true; + foundRegBit = candidates; + return candidates; + } + fixedRegMask = refPosition->registerAssignment; + } + + // Eliminate candidates that are in-use or busy. + // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. + // When we allocate for USE, we see that the register is busy at current location + // and we end up with that candidate is no longer available. + regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; + candidates &= ~busyRegs; + +#ifdef TARGET_ARM + // For TYP_DOUBLE on ARM, we can only use an even floating-point register for which the odd half + // is also available. Thus, for every busyReg that is an odd floating-point register, we need to + // remove from candidates the corresponding even floating-point register. For example, if busyRegs + // contains `f3`, we need to remove `f2` from the candidates for a double register interval. The + // clause below creates a mask to do this. + if (currentInterval->registerType == TYP_DOUBLE) + { + candidates &= ~((busyRegs & RBM_ALLDOUBLE_HIGH) >> 1); + } +#endif // TARGET_ARM + + // Also eliminate as busy any register with a conflicting fixed reference at this or + // the next location. + // Note that this will eliminate the fixedReg, if any, but we'll add it back below. + regMaskTP checkConflictMask = candidates & linearScan->fixedRegs; + while (checkConflictMask != RBM_NONE) + { + regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); + regMaskTP checkConflictBit = genRegMask(checkConflictReg); + checkConflictMask ^= checkConflictBit; + + LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; + + if ((checkConflictLocation == refPosition->nodeLocation) || + (refPosition->delayRegFree && (checkConflictLocation == (refPosition->nodeLocation + 1)))) + { + candidates &= ~checkConflictBit; + } + } + candidates |= fixedRegMask; + found = isSingleRegister(candidates); + + // TODO-Cleanup: Previously, the "reverseSelect" stress mode reversed the order of the heuristics. + // It needs to be re-engineered with this refactoring. + // In non-debug builds, this will simply get optimized away + bool reverseSelect = false; +#ifdef DEBUG + reverseSelect = linearScan->doReverseSelect(); +#endif // DEBUG + + if (!found) + { + if (candidates == RBM_NONE) + { + assert(refPosition->RegOptional()); + currentInterval->assignedReg = nullptr; + return RBM_NONE; + } + + freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + + try_REG_ORDER(); + +#ifdef DEBUG +#if TRACK_LSRA_STATS + INTRACK_STATS_IF(found, linearScan->updateLsraStat(linearScan->getLsraStatFromScore(RegisterScore::REG_ORDER), + refPosition->bbNum)); +#endif // TRACK_LSRA_STATS + if (found) + { + *registerScore = RegisterScore::REG_ORDER; + } +#endif // DEBUG + } + if (!found) + { + if (refPosition->RegOptional() || !refPosition->IsActualRef()) + { + // We won't spill if this refPosition is not an actual ref or is RegOptional + currentInterval->assignedReg = nullptr; + foundRegBit = RBM_NONE; + return RBM_NONE; + } + else + { + try_REG_NUM(); +#ifdef DEBUG +#if TRACK_LSRA_STATS + INTRACK_STATS_IF(found, linearScan->updateLsraStat(linearScan->getLsraStatFromScore(RegisterScore::REG_NUM), + refPosition->bbNum)); +#endif // TRACK_LSRA_STATS + *registerScore = RegisterScore::REG_NUM; +#endif // DEBUG + } + } assert(found && isSingleRegister(candidates)); foundRegBit = candidates; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5ec264def8b57..ab926140a8f42 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1262,6 +1262,9 @@ class LinearScan : public LinearScanInterface FORCEINLINE regMaskTP select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + FORCEINLINE regMaskTP selectMinOpts(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + // If the register is from unassigned set such that it was not already // assigned to the current interval FORCEINLINE bool foundUnassignedReg() From bc00b94e0f5b58474297e737af4e4da491dc1a01 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 09:23:18 -0800 Subject: [PATCH 06/33] Add calculateUnassignedSet() --- src/coreclr/jit/lsra.cpp | 34 +++++++++++++++++++++++++++++++++- src/coreclr/jit/lsra.h | 1 + 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d7b8860a1da08..43f3859aaa0bc 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12192,6 +12192,32 @@ void LinearScan::RegisterSelection::try_REG_NUM() found = applySingleRegSelection(REG_NUM, genFindLowestBit(candidates)); } +// ---------------------------------------------------------- +// calculateUnassignedSets: Calculate the necessary unassigned sets. +// +void LinearScan::RegisterSelection::calculateUnassignedSets() // TODO: Seperate the calculation of unassigned set +{ + if (freeCandidates == RBM_NONE || coversSetsCalculated) + { + return; + } + + regMaskTP coversCandidates = candidates; + for (; coversCandidates != RBM_NONE;) + { + regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); + regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum); + coversCandidates ^= coversCandidateBit; + + // The register is considered unassigned if it has no assignedInterval, OR + // if its next reference is beyond the range of this interval. + if (linearScan->nextIntervalRef[coversCandidateRegNum] > lastLocation) + { + unassignedSet |= coversCandidateBit; + } + } +} + // ---------------------------------------------------------- // calculateCoversSets: Calculate the necessary covers set registers to be used // for heuristics lke COVERS, COVERS_RELATED, COVERS_FULL. @@ -12723,7 +12749,13 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, return RBM_NONE; } - calculateCoversSets(); + calculateUnassignedSets(); + + assert(found && isSingleRegister(candidates)); + foundRegBit = candidates; + return candidates; +} + // ---------------------------------------------------------- // selectMinOpts: For given `currentInterval` and `refPosition`, selects a register to be assigned. // diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ab926140a8f42..e10914c8a32bc 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1348,6 +1348,7 @@ class LinearScan : public LinearScanInterface bool applySelection(int selectionScore, regMaskTP selectionCandidates); bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate); FORCEINLINE void calculateCoversSets(); + FORCEINLINE void calculateUnassignedSets(); FORCEINLINE void reset(Interval* interval, RefPosition* refPosition); FORCEINLINE void resetMinOpts(Interval* interval, RefPosition* refPosition); From 62af8b46b0230ae0825690871509c5ca0b10e277 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 09:24:57 -0800 Subject: [PATCH 07/33] Call spillCostMinOpts() for minopts --- src/coreclr/jit/lsra.cpp | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 43f3859aaa0bc..3e06ae3888a3f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2945,16 +2945,20 @@ template regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regMaskTP foundRegBit = + regNumber foundReg; + regMaskTP foundRegBit; + RegRecord* availablePhysRegRecord; + if (enregisterLocalVars || needsConsecutiveRegisters) + { + foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); - if (foundRegBit == RBM_NONE) { return REG_NA; } - regNumber foundReg = genRegNumFromMask(foundRegBit); - RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); + foundReg = genRegNumFromMask(foundRegBit); + availablePhysRegRecord = getRegisterRecord(foundReg); Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) @@ -3009,6 +3013,28 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, } } } + } + else + { + // For minopts, just unassign `foundReg` from existing interval + foundRegBit = + regSelector->selectMinOpts(currentInterval, refPosition DEBUG_ARG(registerScore)); + foundReg = genRegNumFromMask(foundRegBit); + + if (foundRegBit == RBM_NONE) + { + return REG_NA; + } + + availablePhysRegRecord = getRegisterRecord(foundReg); + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && + isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) + { + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + } + } + assignPhysReg(availablePhysRegRecord, currentInterval); refPosition->registerAssignment = foundRegBit; return foundReg; From df18666d1c815323e78e1cfae5b7dd071a22317d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 09:26:09 -0800 Subject: [PATCH 08/33] Add enregisterLocalVars check in setIntervalAsSpilled() --- src/coreclr/jit/lsra.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 3e06ae3888a3f..997bba40b9c30 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3438,6 +3438,12 @@ void LinearScan::setIntervalAsSplit(Interval* interval) // void LinearScan::setIntervalAsSpilled(Interval* interval) { + if (!enregisterLocalVars) + { + interval->isSpilled = true; + return; + } + #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (interval->isUpperVector) { From f24ef56af238c63aa06c6b91e7d4848c839ba007 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 10:18:26 -0800 Subject: [PATCH 09/33] optimize few paths in resolveRegisters() --- src/coreclr/jit/lsra.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 997bba40b9c30..a367a44801b04 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -7389,6 +7389,11 @@ void LinearScan::resolveRegisters() // In those cases, treeNode will be nullptr. if (treeNode == nullptr) { + if (localVarsEnregistered) + { + continue; + } + // This is either a use, a dead def, or a field of a struct Interval* interval = currentRefPosition->getInterval(); assert(currentRefPosition->refType == RefTypeUse || @@ -7422,7 +7427,8 @@ void LinearScan::resolveRegisters() { writeRegisters(currentRefPosition, treeNode); - if (treeNode->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR) && currentRefPosition->getInterval()->isLocalVar) + if (localVarsEnregistered && treeNode->OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR) && + currentRefPosition->getInterval()->isLocalVar) { resolveLocalRef(block, treeNode->AsLclVar(), currentRefPosition); } From edd08155bad72e3df95f03e83f6ce84a94678022 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 10:20:05 -0800 Subject: [PATCH 10/33] optimize few paths in allocateRegisters() --- src/coreclr/jit/lsra.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index a367a44801b04..0b191687b97d3 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4801,8 +4801,8 @@ void LinearScan::allocateRegisters() } } - if (enregisterLocalVars) - { + resetRegState(); + #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); unsigned largeVectorVarIndex = 0; @@ -4812,9 +4812,7 @@ void LinearScan::allocateRegisters() lclVarInterval->isPartiallySpilled = false; } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - } - resetRegState(); for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) { RegRecord* physRegRecord = getRegisterRecord(reg); @@ -5152,7 +5150,7 @@ void LinearScan::allocateRegisters() } else { - processBlockEndAllocation(currentBlock); + processBlockEndAllocation(currentBlock); currentBlock = moveToNextBlock(); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); } @@ -5200,10 +5198,10 @@ void LinearScan::allocateRegisters() } regsInUseThisLocation |= currentRefPosition.registerAssignment; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); - continue; } - if (refType == RefTypeKill) + else { + assert(refType == RefTypeKill); if (assignedInterval != nullptr) { unassignPhysReg(regRecord, assignedInterval->recentRefPosition); @@ -5212,8 +5210,8 @@ void LinearScan::allocateRegisters() } clearRegBusyUntilKill(regRecord->regNum); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum)); - continue; } + continue; } // If this is an exposed use, do nothing - this is merely a placeholder to attempt to From a55144bc413acc48c0463606ab0ab4fcbb787bcf Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 10:31:54 -0800 Subject: [PATCH 11/33] Adjust RegisterSelection creation --- src/coreclr/jit/lsra.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 0b191687b97d3..280a0ac888e97 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -733,7 +733,6 @@ LinearScan::LinearScan(Compiler* theCompiler) } #endif // TARGET_XARCH - regSelector = new (theCompiler, CMK_LSRA) RegisterSelection(this); firstColdLoc = MaxLocation; #ifdef DEBUG @@ -772,6 +771,9 @@ LinearScan::LinearScan(Compiler* theCompiler) // set won't be recomputed until after Lowering (and this constructor is called prior to Lowering), // so we don't want to check that yet. enregisterLocalVars = compiler->compEnregLocals(); + + regSelector = new (theCompiler, CMK_LSRA) RegisterSelection(this); + #ifdef TARGET_ARM64 availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) From b2bea014232604c1263abd739508184c4c3c189e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 10:32:13 -0800 Subject: [PATCH 12/33] Add allocateRegistersForMinOpts() --- src/coreclr/jit/lsra.cpp | 1483 +++++++++++++++++++++++++++++++------- src/coreclr/jit/lsra.h | 7 +- 2 files changed, 1225 insertions(+), 265 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 280a0ac888e97..5b199c6bc9636 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1402,12 +1402,26 @@ PhaseStatus LinearScan::doLinearScan() #ifdef TARGET_ARM64 if (compiler->info.compNeedsConsecutiveRegisters) { - allocateRegisters(); + if (enregisterLocalVars) + { + allocateRegisters(); + } + else + { + allocateRegistersForMinOpt(); + } } else #endif // TARGET_ARM64 { - allocateRegisters(); + if (enregisterLocalVars) + { + allocateRegisters(); + } + else + { + allocateRegistersForMinOpt(); + } } allocationPassComplete = true; @@ -1909,7 +1923,7 @@ void LinearScan::identifyCandidates() // the same register assignment throughout varDsc->lvRegister = false; - if (!localVarsEnregistered || !isRegCandidate(varDsc)) + if (!isRegCandidate(varDsc)) { varDsc->lvLRACandidate = 0; if (varDsc->lvTracked) @@ -2953,69 +2967,69 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, if (enregisterLocalVars || needsConsecutiveRegisters) { foundRegBit = - regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); - if (foundRegBit == RBM_NONE) - { - return REG_NA; - } + regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); + if (foundRegBit == RBM_NONE) + { + return REG_NA; + } foundReg = genRegNumFromMask(foundRegBit); availablePhysRegRecord = getRegisterRecord(foundReg); - Interval* assignedInterval = availablePhysRegRecord->assignedInterval; - if ((assignedInterval != currentInterval) && - isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) - { - if (regSelector->isSpilling()) + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && + isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) { - // We're spilling. - CLANG_FORMAT_COMMENT_ANCHOR; + if (regSelector->isSpilling()) + { + // We're spilling. + CLANG_FORMAT_COMMENT_ANCHOR; #ifdef TARGET_ARM - if (currentInterval->registerType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); - unassignDoublePhysReg(availablePhysRegRecord); - } - else if (assignedInterval->registerType == TYP_DOUBLE) - { - // Make sure we spill both halves of the double register. - assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); - unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); - } - else + if (currentInterval->registerType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); + unassignDoublePhysReg(availablePhysRegRecord); + } + else if (assignedInterval->registerType == TYP_DOUBLE) + { + // Make sure we spill both halves of the double register. + assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); + unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); + } + else #endif - { - unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); - } - } - else - { - // If we considered this "unassigned" because this interval's lifetime ends before - // the next ref, remember it. - // For historical reasons (due to former short-circuiting of this case), if we're reassigning - // the current interval to a previous assignment, we don't remember the previous interval. - // Note that we need to compute this condition before calling unassignPhysReg, which wil reset - // assignedInterval->physReg. - bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && - (assignedInterval->physReg == foundReg); - unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); - if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) - { - assert(assignedInterval->isConstant); - refPosition->treeNode->SetReuseRegVal(); - } - else if (wasAssigned) - { - updatePreviousInterval(availablePhysRegRecord, - assignedInterval ARM_ARG(assignedInterval->registerType)); + { + unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); + } } else { - assert(!regSelector->isConstAvailable()); + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + // For historical reasons (due to former short-circuiting of this case), if we're reassigning + // the current interval to a previous assignment, we don't remember the previous interval. + // Note that we need to compute this condition before calling unassignPhysReg, which wil reset + // assignedInterval->physReg. + bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && + (assignedInterval->physReg == foundReg); + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) + { + assert(assignedInterval->isConstant); + refPosition->treeNode->SetReuseRegVal(); + } + else if (wasAssigned) + { + updatePreviousInterval(availablePhysRegRecord, + assignedInterval ARM_ARG(assignedInterval->registerType)); + } + else + { + assert(!regSelector->isConstAvailable()); + } } } } - } else { // For minopts, just unassign `foundReg` from existing interval @@ -4538,241 +4552,1177 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) } if (assignedInterval->registerType == TYP_DOUBLE) { - // Skip next float register, because we already addressed a double register - assert(genIsValidDoubleReg(reg)); - reg = REG_NEXT(reg); - makeRegAvailable(reg, physRegRecord->registerType); + // Skip next float register, because we already addressed a double register + assert(genIsValidDoubleReg(reg)); + reg = REG_NEXT(reg); + makeRegAvailable(reg, physRegRecord->registerType); + } + } + } + else + { + Interval* assignedInterval = physRegRecord->assignedInterval; + + if (assignedInterval != nullptr && assignedInterval->registerType == TYP_DOUBLE) + { + // Skip next float register, because we already addressed a double register + assert(genIsValidDoubleReg(reg)); + reg = REG_NEXT(reg); + } + } + } +#else + regMaskTP deadCandidates = ~liveRegs; + + // Only focus on actual registers present + deadCandidates &= actualRegistersMask; + + while (deadCandidates != RBM_NONE) + { + regNumber reg = genFirstRegNumFromMaskAndToggle(deadCandidates); + RegRecord* physRegRecord = getRegisterRecord(reg); + + makeRegAvailable(reg, physRegRecord->registerType); + Interval* assignedInterval = physRegRecord->assignedInterval; + + if (assignedInterval != nullptr) + { + assert(assignedInterval->isLocalVar || assignedInterval->isConstant || assignedInterval->IsUpperVector()); + + if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord) + { + assignedInterval->isActive = false; + if (assignedInterval->getNextRefPosition() == nullptr) + { + unassignPhysReg(physRegRecord, nullptr); + } + if (!assignedInterval->IsUpperVector()) + { + inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK; + } + } + else + { + // This interval may still be active, but was in another register in an + // intervening block. + clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType)); + } + } + } +#endif // TARGET_ARM +} + +//------------------------------------------------------------------------ +// processBlockEndLocations: Record the variables occupying registers after completing the current block. +// +// Arguments: +// currentBlock - the block we have just completed. +// +// Return Value: +// None +// +// Notes: +// This must be called both during the allocation and resolution (write-back) phases. +// This is because we need to have the outVarToRegMap locations in order to set the locations +// at successor blocks during allocation time, but if lclVars are spilled after a block has been +// completed, we need to record the REG_STK location for those variables at resolution time. +// +void LinearScan::processBlockEndLocations(BasicBlock* currentBlock) +{ + assert(currentBlock != nullptr && currentBlock->bbNum == curBBNum); + VarToRegMap outVarToRegMap = getOutVarToRegMap(curBBNum); + + VarSetOps::AssignNoCopy(compiler, currentLiveVars, + VarSetOps::Intersection(compiler, registerCandidateVars, currentBlock->bbLiveOut)); +#ifdef DEBUG + if (getLsraExtendLifeTimes()) + { + VarSetOps::Assign(compiler, currentLiveVars, registerCandidateVars); + } +#endif // DEBUG + VarSetOps::Iter iter(compiler, currentLiveVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + Interval* interval = getIntervalForLocalVar(varIndex); + if (interval->isActive) + { + assert(interval->physReg != REG_NA && interval->physReg != REG_STK); + setVarReg(outVarToRegMap, varIndex, interval->physReg); + } + else + { + outVarToRegMap[varIndex] = REG_STK; + } +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + // Ensure that we have no partially-spilled large vector locals. + assert(!Compiler::varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled); +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB)); +} + +#ifdef DEBUG +void LinearScan::dumpRefPositions(const char* str) +{ + printf("------------\n"); + printf("REFPOSITIONS %s: \n", str); + printf("------------\n"); + for (RefPosition& refPos : refPositions) + { + refPos.dump(this); + } +} +#endif // DEBUG + +//------------------------------------------------------------------------ +// LinearScan::makeRegisterInactive: Make the interval currently assigned to +// a register inactive. +// +// Arguments: +// physRegRecord - the RegRecord for the register +// +// Return Value: +// None. +// +// Notes: +// It may be that the RegRecord has already been freed, e.g. due to a kill, +// or it may be that the register was a copyReg, so is not the assigned register +// of the Interval currently occupying the register, in which case this method has no effect. +// +void LinearScan::makeRegisterInactive(RegRecord* physRegRecord) +{ + Interval* assignedInterval = physRegRecord->assignedInterval; + // It may have already been freed by a "Kill" + if ((assignedInterval != nullptr) && (assignedInterval->physReg == physRegRecord->regNum)) + { + assignedInterval->isActive = false; + if (assignedInterval->isConstant) + { + clearNextIntervalRef(physRegRecord->regNum, assignedInterval->registerType); + } + } +} + +//------------------------------------------------------------------------ +// LinearScan::freeRegister: Make a register available for use +// +// Arguments: +// physRegRecord - the RegRecord for the register to be freed. +// +// Return Value: +// None. +// +// Assumptions: +// None. +// It may be that the RegRecord has already been freed, e.g. due to a kill, +// in which case this method has no effect. +// +// Notes: +// If there is currently an Interval assigned to this register, and it has +// more references (i.e. this is a local last-use, but more uses and/or +// defs remain), it will remain assigned to the physRegRecord. However, since +// it is marked inactive, the register will be available, albeit less desirable +// to allocate. +// +void LinearScan::freeRegister(RegRecord* physRegRecord) +{ + Interval* assignedInterval = physRegRecord->assignedInterval; + makeRegAvailable(physRegRecord->regNum, physRegRecord->registerType); + clearSpillCost(physRegRecord->regNum, physRegRecord->registerType); + makeRegisterInactive(physRegRecord); + + if (assignedInterval != nullptr) + { + // TODO: Under the following conditions we should be just putting it in regsToMakeInactive + // not regsToFree. + // + // We don't unassign in the following conditions: + // - If this is a constant node, that we may encounter again, OR + // - If its recent RefPosition is not a last-use and its next RefPosition is non-null. + // - If there are no more RefPositions, or the next + // one is a def. Note that the latter condition doesn't actually ensure that + // there aren't subsequent uses that could be reached by a value in the assigned + // register, but is merely a heuristic to avoid tying up the register (or using + // it when it's non-optimal). A better alternative would be to use SSA, so that + // we wouldn't unnecessarily link separate live ranges to the same register. + // + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + if (!assignedInterval->isConstant && (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))) + { +#ifdef TARGET_ARM + assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum)); +#endif // TARGET_ARM + unassignPhysReg(physRegRecord, nullptr); + } + } +} + +//------------------------------------------------------------------------ +// LinearScan::freeRegisters: Free the registers in 'regsToFree' +// +// Arguments: +// regsToFree - the mask of registers to free +// +void LinearScan::freeRegisters(regMaskTP regsToFree) +{ + if (regsToFree == RBM_NONE) + { + return; + } + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS)); + makeRegsAvailable(regsToFree); + while (regsToFree != RBM_NONE) + { + regNumber nextReg = genFirstRegNumFromMaskAndToggle(regsToFree); + + RegRecord* regRecord = getRegisterRecord(nextReg); +#ifdef TARGET_ARM + if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE)) + { + assert(genIsValidDoubleReg(nextReg)); + regsToFree ^= genRegMask(regNumber(nextReg + 1)); + } +#endif + freeRegister(regRecord); + } +} + +//------------------------------------------------------------------------ +// LinearScan::allocateRegistersForMinOpt: Perform the actual register allocation for MinOpts by iterating over +// all of the previously constructed Intervals +// +#ifdef TARGET_ARM64 +template +#endif +void LinearScan::allocateRegistersForMinOpt() +{ + JITDUMP("*************** In LinearScan::allocateRegistersForMinOpt()\n"); + DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegistersForMinOpt")); + + // at start, nothing is active except for register args + for (Interval& interval : intervals) + { + Interval* currentInterval = &interval; + currentInterval->recentRefPosition = nullptr; + assert(!currentInterval->isActive); + } + + resetRegState(); + clearAllNextIntervalRef(); + clearAllSpillCost(); + + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* physRegRecord = getRegisterRecord(reg); + physRegRecord->recentRefPosition = nullptr; + updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition); + assert(physRegRecord->assignedInterval == nullptr); + } + +#ifdef DEBUG + if (VERBOSE) + { + dumpRefPositions("BEFORE ALLOCATION"); + dumpVarRefPositions("BEFORE ALLOCATION"); + + printf("\n\nAllocating Registers\n" + "--------------------\n"); + // Start with a small set of commonly used registers, so that we don't keep having to print a new title. + // Include all the arg regs, as they may already have values assigned to them. + registersToDump = LsraLimitSmallIntSet | LsraLimitSmallFPSet | RBM_ARG_REGS; + dumpRegRecordHeader(); + // Now print an empty "RefPosition", since we complete the dump of the regs at the beginning of the loop. + printf(indentFormat, ""); + } +#endif // DEBUG + + BasicBlock* currentBlock = nullptr; + + LsraLocation prevLocation = MinLocation; + regMaskTP regsToFree = RBM_NONE; + regMaskTP delayRegsToFree = RBM_NONE; + regMaskTP regsToMakeInactive = RBM_NONE; + regMaskTP delayRegsToMakeInactive = RBM_NONE; + regMaskTP copyRegsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; + regsInUseNextLocation = RBM_NONE; + + // This is the most recent RefPosition for which a register was allocated + // - currently only used for DEBUG but maintained in non-debug, for clarity of code + // (and will be optimized away because in non-debug spillAlways() unconditionally returns false) + RefPosition* lastAllocatedRefPosition = nullptr; + + bool handledBlockEnd = false; + + for (RefPosition& currentRefPosition : refPositions) + { + RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; + + // TODO: Can we combine this with the freeing of registers below? It might + // mess with the dump, since this was previously being done before the call below + // to dumpRegRecords. + regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive); + while (tempRegsToMakeInactive != RBM_NONE) + { + regNumber nextReg = genFirstRegNumFromMaskAndToggle(tempRegsToMakeInactive); + RegRecord* regRecord = getRegisterRecord(nextReg); + clearSpillCost(regRecord->regNum, regRecord->registerType); + makeRegisterInactive(regRecord); + } + if (currentRefPosition.nodeLocation > prevLocation) + { + makeRegsAvailable(regsToMakeInactive); + // TODO: Clean this up. We need to make the delayRegs inactive as well, but don't want + // to mark them as free yet. + regsToMakeInactive |= delayRegsToMakeInactive; + regsToMakeInactive = delayRegsToMakeInactive; + delayRegsToMakeInactive = RBM_NONE; + } + +#ifdef DEBUG + // Set the activeRefPosition to null until we're done with any boundary handling. + activeRefPosition = nullptr; + if (VERBOSE) + { + // We're really dumping the RegRecords "after" the previous RefPosition, but it's more convenient + // to do this here, since there are a number of "continue"s in this loop. + dumpRegRecords(); + } +#endif // DEBUG + + // This is the previousRefPosition of the current Referent, if any + RefPosition* previousRefPosition = nullptr; + + Interval* currentInterval = nullptr; + Referenceable* currentReferent = nullptr; + RefType refType = currentRefPosition.refType; + assert(!((refType == RefTypeDummyDef) || (refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || (refType == RefTypeExpUse))); + + currentReferent = currentRefPosition.referent; + + if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->IsPhysRegRef() && + !lastAllocatedRefPosition->getInterval()->isInternal && + (!lastAllocatedRefPosition->RegOptional() || (lastAllocatedRefPosition->registerAssignment != RBM_NONE)) && + (RefTypeIsDef(lastAllocatedRefPosition->refType))) + { + assert(lastAllocatedRefPosition->registerAssignment != RBM_NONE); + RegRecord* regRecord = lastAllocatedRefPosition->getInterval()->assignedReg; + + INDEBUG(activeRefPosition = lastAllocatedRefPosition); + unassignPhysReg(regRecord, lastAllocatedRefPosition); + INDEBUG(activeRefPosition = nullptr); + + // Now set lastAllocatedRefPosition to null, so that we don't try to spill it again + lastAllocatedRefPosition = nullptr; + } + + // We wait to free any registers until we've completed all the + // uses for the current node. + // This avoids reusing registers too soon. + // We free before the last true def (after all the uses & internal + // registers), and then again at the beginning of the next node. + // This is made easier by assigning two LsraLocations per node - one + // for all the uses, internal registers & all but the last def, and + // another for the final def (if any). + + LsraLocation currentLocation = currentRefPosition.nodeLocation; + + // Free at a new location. + if (currentLocation > prevLocation) + { + // CopyRegs are simply made available - we don't want to make the associated interval inactive. + makeRegsAvailable(copyRegsToFree); + copyRegsToFree = RBM_NONE; + regsInUseThisLocation = regsInUseNextLocation; + regsInUseNextLocation = RBM_NONE; +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister) + { + consecutiveRegsInUseThisLocation = RBM_NONE; + } +#endif + if ((regsToFree | delayRegsToFree) != RBM_NONE) + { + freeRegisters(regsToFree); + if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE)) + { + // We should never see a delayReg that is delayed until a Location that has no RefPosition + // (that would be the RefPosition that it was supposed to interfere with). + assert(!"Found a delayRegFree associated with Location with no reference"); + // However, to be cautious for the Release build case, we will free them. + freeRegisters(delayRegsToFree); + delayRegsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; + } + regsToFree = delayRegsToFree; + delayRegsToFree = RBM_NONE; + +#ifdef DEBUG + // Validate the current state just after we've freed the registers. This ensures that any pending + // freed registers will have had their state updated to reflect the intervals they were holding. + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + { + regMaskTP regMask = genRegMask(reg); + // If this isn't available or if it's still waiting to be freed (i.e. it was in + // delayRegsToFree and so now it's in regsToFree), then skip it. + if ((regMask & allAvailableRegs & ~regsToFree) == RBM_NONE) + { + continue; + } + RegRecord* physRegRecord = getRegisterRecord(reg); + Interval* assignedInterval = physRegRecord->assignedInterval; + if (assignedInterval != nullptr) + { + bool isAssignedReg = (assignedInterval->physReg == reg); + RefPosition* recentRefPosition = assignedInterval->recentRefPosition; + // If we have a copyReg or a moveReg, we might have assigned this register to an Interval, + // but that isn't considered its assignedReg. + if (recentRefPosition != nullptr) + { + // For copyReg or moveReg, we don't have anything further to assert. + if (recentRefPosition->copyReg || recentRefPosition->moveReg) + { + continue; + } + assert(assignedInterval->isConstant == isRegConstant(reg, assignedInterval->registerType)); + if (assignedInterval->isActive) + { + // If this is not the register most recently allocated, it must be from a copyReg, + // it was placed there by the inVarToRegMap or it might be one of the upper vector + // save/restore refPosition. + // In either case it must be a lclVar. + + if (!isAssignedToInterval(assignedInterval, physRegRecord)) + { + // We'd like to assert that this was either set by the inVarToRegMap, or by + // a copyReg, but we can't traverse backward to check for a copyReg, because + // we only have recentRefPosition, and there may be a previous RefPosition + // at the same Location with a copyReg. + + bool sanityCheck = assignedInterval->isLocalVar; + // For upper vector interval, make sure it was one of the save/restore only. + if (assignedInterval->IsUpperVector()) + { + sanityCheck |= (recentRefPosition->refType == RefTypeUpperVectorSave) || + (recentRefPosition->refType == RefTypeUpperVectorRestore); + } + + assert(sanityCheck); + } + if (isAssignedReg) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + assert(!isRegAvailable(reg, assignedInterval->registerType)); + assert((recentRefPosition == nullptr) || + (spillCost[reg] == getSpillWeight(physRegRecord))); + } + else + { + assert((nextIntervalRef[reg] == MaxLocation) || + isRegBusy(reg, assignedInterval->registerType)); + } + } + else + { + if ((assignedInterval->physReg == reg) && !assignedInterval->isConstant) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + } + else + { + assert(nextIntervalRef[reg] == MaxLocation); + assert(isRegAvailable(reg, assignedInterval->registerType)); + assert(spillCost[reg] == 0); + } + } + } + } + else + { + // Available registers should not hold constants + assert(isRegAvailable(reg, physRegRecord->registerType)); + assert(!isRegConstant(reg, physRegRecord->registerType) || spillAlways()); + assert(nextIntervalRef[reg] == MaxLocation); + assert(spillCost[reg] == 0); + } + LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); + assert(nextFixedRef[reg] == thisNextFixedRef); +#ifdef TARGET_ARM + // If this is occupied by a double interval, skip the corresponding float reg. + if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) + { + reg = REG_NEXT(reg); + } +#endif + } +#endif // DEBUG + } + } + prevLocation = currentLocation; +#ifdef TARGET_ARM64 + +#ifdef DEBUG + if (hasConsecutiveRegister) + { + if (currentRefPosition.needsConsecutive) + { + // track all the refpositions around the location that is also + // allocating consecutive registers. + consecutiveRegistersLocation = currentLocation; + } + else if (consecutiveRegistersLocation < currentLocation) + { + consecutiveRegistersLocation = MinLocation; + } + } +#endif // DEBUG +#endif // TARGET_ARM64 + + // get previous refposition, then current refpos is the new previous + if (currentReferent != nullptr) + { + previousRefPosition = currentReferent->recentRefPosition; + currentReferent->recentRefPosition = ¤tRefPosition; + } + else + { + assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs)); + } + +#ifdef DEBUG + activeRefPosition = ¤tRefPosition; + + // For the purposes of register resolution, we handle the DummyDefs before + // the block boundary - so the RefTypeBB is after all the DummyDefs. + // However, for the purposes of allocation, we want to handle the block + // boundary first, so that we can free any registers occupied by lclVars + // that aren't live in the next block and make them available for the + // DummyDefs. + + // If we've already handled the BlockEnd, but now we're seeing the RefTypeBB, + // dump it now. + if ((refType == RefTypeBB) && handledBlockEnd) + { + dumpNewBlock(currentBlock, currentRefPosition.nodeLocation); + } +#endif // DEBUG + + if (!handledBlockEnd && refType == RefTypeBB) + { + // Free any delayed regs (now in regsToFree) before processing the block boundary + freeRegisters(regsToFree); + regsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; + regsInUseNextLocation = RBM_NONE; + handledBlockEnd = true; + curBBStartLocation = currentRefPosition.nodeLocation; + if (currentBlock == nullptr) + { + currentBlock = startBlockSequence(); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, compiler->fgFirstBB)); + } + else + { + processBlockEndAllocation(currentBlock); + currentBlock = moveToNextBlock(); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); + } + } + + if (refType == RefTypeBB) + { + handledBlockEnd = false; + continue; + } + + if (refType == RefTypeKillGCRefs) + { + spillGCRefs(¤tRefPosition); + continue; + } + + if (currentRefPosition.isPhysRegRef) + { + RegRecord* regRecord = currentRefPosition.getReg(); + Interval* assignedInterval = regRecord->assignedInterval; + + updateNextFixedRef(regRecord, currentRefPosition.nextRefPosition); + + // If this is a FixedReg, disassociate any inactive constant interval from this register. + // Otherwise, do nothing. + if (refType == RefTypeFixedReg) + { + if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) + { + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + regRecord->assignedInterval = nullptr; + spillCost[regRecord->regNum] = 0; + +#ifdef TARGET_ARM + // Update overlapping floating point register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) + { + RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); + assert(otherRegRecord->assignedInterval == assignedInterval); + otherRegRecord->assignedInterval = nullptr; + spillCost[otherRegRecord->regNum] = 0; + } +#endif // TARGET_ARM + } + regsInUseThisLocation |= currentRefPosition.registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition.assignedReg())); + } + else + { + assert(refType == RefTypeKill); + if (assignedInterval != nullptr) + { + unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + makeRegAvailable(regRecord->regNum, assignedInterval->registerType); + } + clearRegBusyUntilKill(regRecord->regNum); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum)); + } + continue; + } + + regNumber assignedRegister = REG_NA; + + assert(currentRefPosition.isIntervalRef()); + currentInterval = currentRefPosition.getInterval(); + assert(currentInterval != nullptr); + assert(!currentInterval->isLocalVar); + assignedRegister = currentInterval->physReg; + + // Identify the special cases where we decide up-front not to allocate + bool allocate = true; + bool didDump = false; + +#ifdef FEATURE_SIMD +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (refType == RefTypeUpperVectorSave) + { + // Note that this case looks a lot like the case below, but in this case we need to spill + // at the previous RefPosition. + // We may want to consider allocating two callee-save registers for this case, but it happens rarely + // enough that it may not warrant the additional complexity. + if (assignedRegister != REG_NA) + { + RegRecord* regRecord = getRegisterRecord(assignedRegister); + Interval* assignedInterval = regRecord->assignedInterval; + unassignPhysReg(regRecord, currentInterval->firstRefPosition); + if (assignedInterval->isConstant) + { + clearConstantReg(assignedRegister, assignedInterval->registerType); + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + } + currentRefPosition.registerAssignment = RBM_NONE; + continue; + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE +#endif // FEATURE_SIMD + + if (assignedRegister == REG_NA && RefTypeIsUse(refType)) + { + currentRefPosition.reload = true; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); + } + + regMaskTP assignedRegBit = RBM_NONE; + bool isInRegister = false; + if (assignedRegister != REG_NA) + { + isInRegister = true; + assignedRegBit = genRegMask(assignedRegister); + if (!currentInterval->isActive) + { + assert(!RefTypeIsUse(refType)); + + currentInterval->isActive = true; + setRegInUse(assignedRegister, currentInterval->registerType); + updateSpillCost(assignedRegister, currentInterval); + updateNextIntervalRef(assignedRegister, currentInterval); + } + assert(currentInterval->assignedReg != nullptr && + currentInterval->assignedReg->regNum == assignedRegister && + currentInterval->assignedReg->assignedInterval == currentInterval); + } + + if (previousRefPosition != nullptr) + { + assert(previousRefPosition->nextRefPosition == ¤tRefPosition); + assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment || + currentRefPosition.outOfOrder || previousRefPosition->copyReg); + } + + if (assignedRegister != REG_NA) + { + RegRecord* physRegRecord = getRegisterRecord(assignedRegister); + assert((assignedRegBit == currentRefPosition.registerAssignment) || + (physRegRecord->assignedInterval == currentInterval) || + !isRegInUse(assignedRegister, currentInterval->registerType)); + + if (conflictingFixedRegReference(assignedRegister, ¤tRefPosition)) + { + // We may have already reassigned the register to the conflicting reference. + // If not, we need to unassign this interval. + if (physRegRecord->assignedInterval == currentInterval) + { + unassignPhysRegNoSpill(physRegRecord); + clearConstantReg(assignedRegister, currentInterval->registerType); + } + currentRefPosition.moveReg = true; + assignedRegister = REG_NA; + currentRefPosition.registerAssignment &= ~assignedRegBit; + setIntervalAsSplit(currentInterval); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); + } + else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) + { +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + // For consecutive registers, if the first RefPosition is already assigned to a register, + // check if consecutive registers are free so they can be assigned to the subsequent + // RefPositions. + if (canAssignNextConsecutiveRegisters(¤tRefPosition, assignedRegister)) + { + // Current assignedRegister satisfies the consecutive registers requirements + currentRefPosition.registerAssignment = assignedRegBit; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); + + assignConsecutiveRegisters(¤tRefPosition, assignedRegister); + } + else + { + // It doesn't satisfy, so do a copyReg for the first RefPosition to such a register, so + // it would be possible to allocate consecutive registers to the subsequent RefPositions. + regNumber copyReg = assignCopyReg(¤tRefPosition); + assignConsecutiveRegisters(¤tRefPosition, copyReg); + + if (copyReg != assignedRegister) + { + lastAllocatedRefPosition = ¤tRefPosition; + regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + + if ((consecutiveRegsInUseThisLocation & assignedRegMask) != RBM_NONE) + { + // If assigned register is one of the consecutive register we are about to assign + // to the subsequent RefPositions, do not mark it busy otherwise when we allocate + // for that particular subsequent RefPosition, we will not find any candidates + // available. Not marking it busy should be fine because we have already set the + // register assignments for all the consecutive refpositions. + assignedRegMask = RBM_NONE; + } + + // For consecutive register, although it shouldn't matter what the assigned register was, + // because we have just assigned it `copyReg` and that's the one in-use, and not the + // one that was assigned previously. However, in situation where an upper-vector restore + // happened to be restored in assignedReg, we would need assignedReg to stay alive because + // we will copy the entire vector value from it to the `copyReg`. + + updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree, + &delayRegsToFree DEBUG_ARG(currentInterval) + DEBUG_ARG(assignedRegister)); + if (!currentRefPosition.lastUse) + { + copyRegsToFree |= copyRegMask; + } + + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + // Note: In theory a moveReg should cause the Interval to now have the new reg as its + // assigned register. However, that's not currently how this works. + // If we ever actually move lclVar intervals instead of copying, this will need to change. + if (!currentInterval->isLocalVar) + { + currentRefPosition.moveReg = true; + currentRefPosition.copyReg = false; + } + clearNextIntervalRef(copyReg, currentInterval->registerType); + clearSpillCost(copyReg, currentInterval->registerType); + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); + } + else + { + // We first noticed that with assignedRegister, we were not getting consecutive registers + // assigned, so we decide to perform copyReg. However, copyReg assigned same register + // because there were no other free registers that would satisfy the consecutive registers + // requirements. In such case, just revert the copyReg state update. + currentRefPosition.copyReg = false; + + // Current assignedRegister satisfies the consecutive registers requirements + currentRefPosition.registerAssignment = assignedRegBit; + } + + continue; + } + } + else +#endif + { + currentRefPosition.registerAssignment = assignedRegBit; + if (!currentInterval->isActive) + { + currentRefPosition.reload = true; + } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); + } + } + else + { + // It's already in a register, but not one we need. + if (!RefTypeIsDef(currentRefPosition.refType)) + { + regNumber copyReg; +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive && + currentRefPosition.refType == RefTypeUse) + { + copyReg = assignCopyReg(¤tRefPosition); + } + else +#endif + { + copyReg = assignCopyReg(¤tRefPosition); + } + + lastAllocatedRefPosition = ¤tRefPosition; + regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) + { + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + // If the first RefPosition was not assigned to the register that we wanted, we added + // a copyReg for it. Allocate subsequent RefPositions with the consecutive + // registers. + assignConsecutiveRegisters(¤tRefPosition, copyReg); + } + + if ((consecutiveRegsInUseThisLocation & assignedRegMask) != RBM_NONE) + { + // If assigned register is one of the consecutive register we are about to assign + // to the subsequent RefPositions, do not mark it busy otherwise when we allocate + // for that particular subsequent RefPosition, we will not find any candidates + // available. Not marking it busy should be fine because we have already set the + // register assignments for all the consecutive refpositions. + assignedRegMask = RBM_NONE; + } + } +#endif + // For consecutive register, although it shouldn't matter what the assigned register was, + // because we have just assigned it `copyReg` and that's the one in-use, and not the + // one that was assigned previously. However, in situation where an upper-vector restore + // happened to be restored in assignedReg, we would need assignedReg to stay alive because + // we will copy the entire vector value from it to the `copyReg`. + updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree, + &delayRegsToFree DEBUG_ARG(currentInterval) DEBUG_ARG(assignedRegister)); + if (!currentRefPosition.lastUse) + { + copyRegsToFree |= copyRegMask; + } + + // For tree temp (non-localVar) interval, we will need an explicit move. + currentRefPosition.moveReg = true; + currentRefPosition.copyReg = false; + clearNextIntervalRef(copyReg, currentInterval->registerType); + clearSpillCost(copyReg, currentInterval->registerType); + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); + continue; + } + else + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); + regsToFree |= getRegMask(assignedRegister, currentInterval->registerType); + // We want a new register, but we don't want this to be considered a spill. + assignedRegister = REG_NA; + if (physRegRecord->assignedInterval == currentInterval) + { + unassignPhysRegNoSpill(physRegRecord); + } + } + } + } + +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) + { + // For consecutive register, we would like to assign a register (if not already assigned) + // to the 1st refPosition and the subsequent refPositions will just get the consecutive register. + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + if (assignedRegister != REG_NA) + { + // For the 1st refPosition, if it already has a register assigned, then just assign + // subsequent registers to the remaining position and skip the allocation for the + // 1st refPosition altogether. + + if (!canAssignNextConsecutiveRegisters(¤tRefPosition, assignedRegister)) + { + // The consecutive registers are busy. Force to allocate even for the 1st + // refPosition + assignedRegister = REG_NA; + RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); + currentRefPosition.registerAssignment = allRegs(currentInterval->registerType); + } + } + } + else if (currentRefPosition.refType == RefTypeUse) + { + // remaining refPosition of the series... + if (assignedRegBit == currentRefPosition.registerAssignment) + { + // For the subsequent position, if they already have the subsequent register assigned, then + // no need to find register to assign. + allocate = false; + } + else + { + // If the subsequent refPosition is not assigned to the consecutive register, then reassign the + // right consecutive register. + assignedRegister = REG_NA; + } + } + } +#endif // TARGET_ARM64 + + if (assignedRegister == REG_NA) + { + if (currentRefPosition.RegOptional()) + { + // We can avoid allocating a register if it is a last use requiring a reload. + if (currentRefPosition.lastUse && currentRefPosition.reload) + { + allocate = false; + } + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(TARGET_XARCH) + // We can also avoid allocating a register (in fact we don't want to) if we have + // an UpperVectorRestore on xarch where the value is on the stack. + if ((currentRefPosition.refType == RefTypeUpperVectorRestore) && (currentInterval->physReg == REG_NA)) + { + assert(currentRefPosition.regOptional); + allocate = false; + } +#endif + +#ifdef DEBUG + // Under stress mode, don't allocate registers to RegOptional RefPositions. + if (allocate && regOptionalNoAlloc()) + { + allocate = false; + } +#endif + } + + RegisterScore registerScore = NONE; + if (allocate) + { + // Allocate a register, if we must, or if it is profitable to do so. + // If we have a fixed reg requirement, and the interval is inactive in another register, + // unassign that register. + if (currentRefPosition.isFixedRegRef && !currentInterval->isActive && + (currentInterval->assignedReg != nullptr) && + (currentInterval->assignedReg->assignedInterval == currentInterval) && + (genRegMask(currentInterval->assignedReg->regNum) != currentRefPosition.registerAssignment)) + { + unassignPhysReg(currentInterval->assignedReg, nullptr); + } + +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) + { + assignedRegister = + allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + assignConsecutiveRegisters(¤tRefPosition, assignedRegister); + } + } + else +#endif // TARGET_ARM64 + { + assignedRegister = allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + } + } + + // If no register was found, this RefPosition must not require a register. + if (assignedRegister == REG_NA) + { + assert(currentRefPosition.RegOptional()); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + currentRefPosition.registerAssignment = RBM_NONE; + currentRefPosition.reload = false; + currentInterval->isActive = false; + setIntervalAsSpilled(currentInterval); + } +#ifdef DEBUG + else + { + if (VERBOSE) + { + if (currentInterval->isConstant && (currentRefPosition.treeNode != nullptr) && + currentRefPosition.treeNode->IsReuseRegVal()) + { + dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, currentInterval, assignedRegister, currentBlock, + registerScore); + } + else + { + dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister, currentBlock, + registerScore); + } + } + } +#endif // DEBUG + + // If we allocated a register, and this is a use of a spilled value, + // it should have been marked for reload above. + if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister) + { + assert(currentRefPosition.reload); + } + } + + // If we allocated a register, record it + if (assignedRegister != REG_NA) + { + assignedRegBit = genRegMask(assignedRegister); + regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType); + regsInUseThisLocation |= regMask; + if (currentRefPosition.delayRegFree) + { + regsInUseNextLocation |= regMask; + } + currentRefPosition.registerAssignment = assignedRegBit; + + currentInterval->physReg = assignedRegister; + regsToFree &= ~regMask; // we'll set it again later if it's dead + + // If this interval is dead, free the register. + // The interval could be dead if this is a user variable, or if the + // node is being evaluated for side effects, or a call whose result + // is not used, etc. + // If this is an UpperVector we'll neither free it nor preference it + // (it will be freed when it is used). + bool unassign = false; + if (currentRefPosition.lastUse || (currentRefPosition.nextRefPosition == nullptr)) + { + assert(currentRefPosition.isIntervalRef()); + // If this isn't a final use, we'll mark the register as available, but keep the association. + if ((refType != RefTypeExpUse) && (currentRefPosition.nextRefPosition == nullptr)) + { + unassign = true; + } + else + { + if (currentRefPosition.delayRegFree) + { + delayRegsToMakeInactive |= regMask; + } + else + { + regsToMakeInactive |= regMask; + } + // TODO-Cleanup: this makes things consistent with previous, and will enable preferences + // to be propagated, but it seems less than ideal. + currentInterval->isActive = false; + } + // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. + // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we + // don't know yet whether the register will be retained. + if (currentInterval->relatedInterval != nullptr) + { + currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); } } - } - else - { - Interval* assignedInterval = physRegRecord->assignedInterval; - - if (assignedInterval != nullptr && assignedInterval->registerType == TYP_DOUBLE) - { - // Skip next float register, because we already addressed a double register - assert(genIsValidDoubleReg(reg)); - reg = REG_NEXT(reg); - } - } - } -#else - regMaskTP deadCandidates = ~liveRegs; - - // Only focus on actual registers present - deadCandidates &= actualRegistersMask; - - while (deadCandidates != RBM_NONE) - { - regNumber reg = genFirstRegNumFromMaskAndToggle(deadCandidates); - RegRecord* physRegRecord = getRegisterRecord(reg); - - makeRegAvailable(reg, physRegRecord->registerType); - Interval* assignedInterval = physRegRecord->assignedInterval; - - if (assignedInterval != nullptr) - { - assert(assignedInterval->isLocalVar || assignedInterval->isConstant || assignedInterval->IsUpperVector()); - if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord) + if (unassign) { - assignedInterval->isActive = false; - if (assignedInterval->getNextRefPosition() == nullptr) + if (currentRefPosition.delayRegFree) { - unassignPhysReg(physRegRecord, nullptr); + delayRegsToFree |= regMask; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); } - if (!assignedInterval->IsUpperVector()) + else { - inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK; + regsToFree |= regMask; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); } } - else + if (!unassign) { - // This interval may still be active, but was in another register in an - // intervening block. - clearAssignedInterval(physRegRecord ARM_ARG(assignedInterval->registerType)); + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); } } + lastAllocatedRefPosition = ¤tRefPosition; } -#endif // TARGET_ARM -} -//------------------------------------------------------------------------ -// processBlockEndLocations: Record the variables occupying registers after completing the current block. -// -// Arguments: -// currentBlock - the block we have just completed. -// -// Return Value: -// None -// -// Notes: -// This must be called both during the allocation and resolution (write-back) phases. -// This is because we need to have the outVarToRegMap locations in order to set the locations -// at successor blocks during allocation time, but if lclVars are spilled after a block has been -// completed, we need to record the REG_STK location for those variables at resolution time. -// -void LinearScan::processBlockEndLocations(BasicBlock* currentBlock) -{ - assert(currentBlock != nullptr && currentBlock->bbNum == curBBNum); - VarToRegMap outVarToRegMap = getOutVarToRegMap(curBBNum); + // Free registers to clear associated intervals for resolution phase + CLANG_FORMAT_COMMENT_ANCHOR; - VarSetOps::AssignNoCopy(compiler, currentLiveVars, - VarSetOps::Intersection(compiler, registerCandidateVars, currentBlock->bbLiveOut)); #ifdef DEBUG if (getLsraExtendLifeTimes()) { - VarSetOps::Assign(compiler, currentLiveVars, registerCandidateVars); - } -#endif // DEBUG - VarSetOps::Iter iter(compiler, currentLiveVars); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - Interval* interval = getIntervalForLocalVar(varIndex); - if (interval->isActive) - { - assert(interval->physReg != REG_NA && interval->physReg != REG_STK); - setVarReg(outVarToRegMap, varIndex, interval->physReg); - } - else + // If we have extended lifetimes, we need to make sure all the registers are freed. + for (size_t regNumIndex = 0; regNumIndex <= REG_FP_LAST; regNumIndex++) { - outVarToRegMap[varIndex] = REG_STK; + RegRecord& regRecord = physRegs[regNumIndex]; + Interval* interval = regRecord.assignedInterval; + if (interval != nullptr) + { + interval->isActive = false; + unassignPhysReg(®Record, nullptr); + } } -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - // Ensure that we have no partially-spilled large vector locals. - assert(!Compiler::varTypeNeedsPartialCalleeSave(interval->registerType) || !interval->isPartiallySpilled); -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB)); -} - -#ifdef DEBUG -void LinearScan::dumpRefPositions(const char* str) -{ - printf("------------\n"); - printf("REFPOSITIONS %s: \n", str); - printf("------------\n"); - for (RefPosition& refPos : refPositions) - { - refPos.dump(this); } -} + else #endif // DEBUG - -//------------------------------------------------------------------------ -// LinearScan::makeRegisterInactive: Make the interval currently assigned to -// a register inactive. -// -// Arguments: -// physRegRecord - the RegRecord for the register -// -// Return Value: -// None. -// -// Notes: -// It may be that the RegRecord has already been freed, e.g. due to a kill, -// or it may be that the register was a copyReg, so is not the assigned register -// of the Interval currently occupying the register, in which case this method has no effect. -// -void LinearScan::makeRegisterInactive(RegRecord* physRegRecord) -{ - Interval* assignedInterval = physRegRecord->assignedInterval; - // It may have already been freed by a "Kill" - if ((assignedInterval != nullptr) && (assignedInterval->physReg == physRegRecord->regNum)) { - assignedInterval->isActive = false; - if (assignedInterval->isConstant) - { - clearNextIntervalRef(physRegRecord->regNum, assignedInterval->registerType); - } + freeRegisters(regsToFree | delayRegsToFree); } -} - -//------------------------------------------------------------------------ -// LinearScan::freeRegister: Make a register available for use -// -// Arguments: -// physRegRecord - the RegRecord for the register to be freed. -// -// Return Value: -// None. -// -// Assumptions: -// None. -// It may be that the RegRecord has already been freed, e.g. due to a kill, -// in which case this method has no effect. -// -// Notes: -// If there is currently an Interval assigned to this register, and it has -// more references (i.e. this is a local last-use, but more uses and/or -// defs remain), it will remain assigned to the physRegRecord. However, since -// it is marked inactive, the register will be available, albeit less desirable -// to allocate. -// -void LinearScan::freeRegister(RegRecord* physRegRecord) -{ - Interval* assignedInterval = physRegRecord->assignedInterval; - makeRegAvailable(physRegRecord->regNum, physRegRecord->registerType); - clearSpillCost(physRegRecord->regNum, physRegRecord->registerType); - makeRegisterInactive(physRegRecord); - if (assignedInterval != nullptr) +#ifdef DEBUG + if (VERBOSE) { - // TODO: Under the following conditions we should be just putting it in regsToMakeInactive - // not regsToFree. - // - // We don't unassign in the following conditions: - // - If this is a constant node, that we may encounter again, OR - // - If its recent RefPosition is not a last-use and its next RefPosition is non-null. - // - If there are no more RefPositions, or the next - // one is a def. Note that the latter condition doesn't actually ensure that - // there aren't subsequent uses that could be reached by a value in the assigned - // register, but is merely a heuristic to avoid tying up the register (or using - // it when it's non-optimal). A better alternative would be to use SSA, so that - // we wouldn't unnecessarily link separate live ranges to the same register. - // - RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); - if (!assignedInterval->isConstant && (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))) - { -#ifdef TARGET_ARM - assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum)); -#endif // TARGET_ARM - unassignPhysReg(physRegRecord, nullptr); - } - } -} + // Dump the RegRecords after the last RefPosition is handled. + dumpRegRecords(); + printf("\n"); -//------------------------------------------------------------------------ -// LinearScan::freeRegisters: Free the registers in 'regsToFree' -// -// Arguments: -// regsToFree - the mask of registers to free -// -void LinearScan::freeRegisters(regMaskTP regsToFree) -{ - if (regsToFree == RBM_NONE) - { - return; - } + dumpRefPositions("AFTER ALLOCATION"); + dumpVarRefPositions("AFTER ALLOCATION"); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS)); - makeRegsAvailable(regsToFree); - while (regsToFree != RBM_NONE) - { - regNumber nextReg = genFirstRegNumFromMaskAndToggle(regsToFree); + // Dump the intervals that remain active + printf("Active intervals at end of allocation:\n"); - RegRecord* regRecord = getRegisterRecord(nextReg); -#ifdef TARGET_ARM - if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE)) + // We COULD just reuse the intervalIter from above, but ArrayListIterator doesn't + // provide a Reset function (!) - we'll probably replace this so don't bother + // adding it + + for (Interval& interval : intervals) { - assert(genIsValidDoubleReg(nextReg)); - regsToFree ^= genRegMask(regNumber(nextReg + 1)); + if (interval.isActive) + { + printf("Active "); + interval.dump(this->compiler); + } } -#endif - freeRegister(regRecord); + + printf("\n"); } +#endif // DEBUG } //------------------------------------------------------------------------ @@ -4806,13 +5756,13 @@ void LinearScan::allocateRegisters() resetRegState(); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); - unsigned largeVectorVarIndex = 0; - while (largeVectorVarsIter.NextElem(&largeVectorVarIndex)) - { - Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex); - lclVarInterval->isPartiallySpilled = false; - } + VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); + unsigned largeVectorVarIndex = 0; + while (largeVectorVarsIter.NextElem(&largeVectorVarIndex)) + { + Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex); + lclVarInterval->isPartiallySpilled = false; + } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) @@ -11566,6 +12516,11 @@ LinearScan::RegisterSelection::RegisterSelection(LinearScan* linearScan) if (ordering == nullptr) { ordering = W("ABCDEFGHIJKLMNOPQ"); + + if (!linearScan->enregisterLocalVars) + { + ordering = W("MQQQQQQQQQQQQQQQQ"); + } } for (int orderId = 0; orderId < REGSELECT_HEURISTIC_COUNT; orderId++) @@ -12774,7 +13729,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #define REG_SEL_DEF(stat, value, shortname, orderSeqId) #define BUSY_REG_SEL_DEF(stat, value, shortname, orderSeqId) \ try_##stat(); \ - IF_FOUND_GOTO_DONE + IF_FOUND_GOTO_DONE #include "lsra_score.h" #endif // DEBUG diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index e10914c8a32bc..d7989bba1bb92 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -641,8 +641,13 @@ class LinearScan : public LinearScanInterface #ifdef TARGET_ARM64 template #endif - void allocateRegisters(); + void allocateRegistersForMinOpt(); + // This is where the actual assignment is done +#ifdef TARGET_ARM64 + template +#endif + void allocateRegisters(); // This is the resolution phase, where cross-block mismatches are fixed up template void resolveRegisters(); From 8ce699bbe7db33bbd88af59ff3166ef21c3c82a7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 1 Jan 2024 10:36:38 -0800 Subject: [PATCH 13/33] jit format --- src/coreclr/jit/lsra.cpp | 38 +++++++++++++++++++------------------- src/coreclr/jit/lsra.h | 6 +++--- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5b199c6bc9636..31efea933e52e 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2961,8 +2961,8 @@ template regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regNumber foundReg; - regMaskTP foundRegBit; + regNumber foundReg; + regMaskTP foundRegBit; RegRecord* availablePhysRegRecord; if (enregisterLocalVars || needsConsecutiveRegisters) { @@ -2973,9 +2973,9 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, return REG_NA; } - foundReg = genRegNumFromMask(foundRegBit); + foundReg = genRegNumFromMask(foundRegBit); availablePhysRegRecord = getRegisterRecord(foundReg); - Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) { @@ -3033,8 +3033,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, else { // For minopts, just unassign `foundReg` from existing interval - foundRegBit = - regSelector->selectMinOpts(currentInterval, refPosition DEBUG_ARG(registerScore)); + foundRegBit = regSelector->selectMinOpts(currentInterval, refPosition DEBUG_ARG(registerScore)); foundReg = genRegNumFromMask(foundRegBit); if (foundRegBit == RBM_NONE) @@ -3043,7 +3042,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, } availablePhysRegRecord = getRegisterRecord(foundReg); - Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) { @@ -4244,9 +4243,9 @@ void LinearScan::resetAllRegistersState() for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) { - RegRecord* physRegRecord = getRegisterRecord(reg); + RegRecord* physRegRecord = getRegisterRecord(reg); #ifdef DEBUG - Interval* assignedInterval = physRegRecord->assignedInterval; + Interval* assignedInterval = physRegRecord->assignedInterval; assert(assignedInterval == nullptr || assignedInterval->isConstant); #endif physRegRecord->assignedInterval = nullptr; @@ -4898,7 +4897,8 @@ void LinearScan::allocateRegistersForMinOpt() Interval* currentInterval = nullptr; Referenceable* currentReferent = nullptr; RefType refType = currentRefPosition.refType; - assert(!((refType == RefTypeDummyDef) || (refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || (refType == RefTypeExpUse))); + assert(!((refType == RefTypeDummyDef) || (refType == RefTypeParamDef) || (refType == RefTypeZeroInit) || + (refType == RefTypeExpUse))); currentReferent = currentRefPosition.referent; @@ -12551,10 +12551,10 @@ void LinearScan::RegisterSelection::resetMinOpts(Interval* interval, RefPosition currentInterval = interval; refPosition = refPos; - regType = linearScan->getRegisterType(currentInterval, refPosition); - candidates = refPosition->registerAssignment; - freeCandidates = RBM_NONE; - found = false; + regType = linearScan->getRegisterType(currentInterval, refPosition); + candidates = refPosition->registerAssignment; + freeCandidates = RBM_NONE; + found = false; } // ---------------------------------------------------------- @@ -13729,7 +13729,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #define REG_SEL_DEF(stat, value, shortname, orderSeqId) #define BUSY_REG_SEL_DEF(stat, value, shortname, orderSeqId) \ try_##stat(); \ - IF_FOUND_GOTO_DONE + IF_FOUND_GOTO_DONE #include "lsra_score.h" #endif // DEBUG @@ -13759,8 +13759,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, // Return Values: // Register bit selected (a single register) and REG_NA if no register was selected. // -regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) +regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { #ifdef DEBUG *registerScore = NONE; @@ -13827,7 +13827,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* assert(genMaxOneBit(refPosition->registerAssignment)); if (candidates == refPosition->registerAssignment) { - found = true; + found = true; foundRegBit = candidates; return candidates; } @@ -13921,7 +13921,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* #ifdef DEBUG #if TRACK_LSRA_STATS INTRACK_STATS_IF(found, linearScan->updateLsraStat(linearScan->getLsraStatFromScore(RegisterScore::REG_NUM), - refPosition->bbNum)); + refPosition->bbNum)); #endif // TRACK_LSRA_STATS *registerScore = RegisterScore::REG_NUM; #endif // DEBUG diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index d7989bba1bb92..e427ea20ecba0 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -643,7 +643,7 @@ class LinearScan : public LinearScanInterface #endif void allocateRegistersForMinOpt(); - // This is where the actual assignment is done +// This is where the actual assignment is done #ifdef TARGET_ARM64 template #endif @@ -1267,8 +1267,8 @@ class LinearScan : public LinearScanInterface FORCEINLINE regMaskTP select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - FORCEINLINE regMaskTP selectMinOpts(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + FORCEINLINE regMaskTP selectMinOpts(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); // If the register is from unassigned set such that it was not already // assigned to the current interval From 4ae62ed40e72b5edd33c81dd14829a3750d6c3a5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 2 Jan 2024 07:12:38 -0800 Subject: [PATCH 14/33] Update the assert for processBlockStartLocations() --- src/coreclr/jit/lsra.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 31efea933e52e..bd06de2017476 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4271,9 +4271,9 @@ void LinearScan::resetAllRegistersState() // void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) { - // If we have no register candidates we should only call this method during allocation. + // We should only call this method if we have register candidates. - assert(enregisterLocalVars || !allocationPassComplete); + assert(enregisterLocalVars); unsigned predBBNum = blockInfo[currentBlock->bbNum].predBBNum; VarToRegMap predVarToRegMap = getOutVarToRegMap(predBBNum); From 7f29c4e7048fcfb41ed9d009dfa93c73b7eb69d9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 3 Jan 2024 11:36:05 -0800 Subject: [PATCH 15/33] Fix bug by using freeCandidates --- src/coreclr/jit/lsra.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index bd06de2017476..6ff2367643947 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12551,10 +12551,9 @@ void LinearScan::RegisterSelection::resetMinOpts(Interval* interval, RefPosition currentInterval = interval; refPosition = refPos; - regType = linearScan->getRegisterType(currentInterval, refPosition); - candidates = refPosition->registerAssignment; - freeCandidates = RBM_NONE; - found = false; + regType = linearScan->getRegisterType(currentInterval, refPosition); + candidates = refPosition->registerAssignment; + found = false; } // ---------------------------------------------------------- @@ -13891,7 +13890,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInter return RBM_NONE; } - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + candidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); try_REG_ORDER(); From 755b9aabc24fdf63c9ac5c7cd55fd6ac6ea83d6b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 00:03:33 -0800 Subject: [PATCH 16/33] Set candidates only if freeCandidates are NONE --- src/coreclr/jit/lsra.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 6ff2367643947..d32e9465f5ccd 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -13890,9 +13890,14 @@ regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInter return RBM_NONE; } - candidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + + if (freeCandidates != RBM_NONE) + { + candidates = freeCandidates; try_REG_ORDER(); + } #ifdef DEBUG #if TRACK_LSRA_STATS From a0222da3d29d1bf750733630412fdafae34eba7c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 00:08:30 -0800 Subject: [PATCH 17/33] Remove unnecessary setting of foundRegBit --- src/coreclr/jit/lsra.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d32e9465f5ccd..b30249bdfc7d6 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3032,20 +3032,19 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, } else { - // For minopts, just unassign `foundReg` from existing interval foundRegBit = regSelector->selectMinOpts(currentInterval, refPosition DEBUG_ARG(registerScore)); - foundReg = genRegNumFromMask(foundRegBit); - if (foundRegBit == RBM_NONE) { return REG_NA; } + foundReg = genRegNumFromMask(foundRegBit); availablePhysRegRecord = getRegisterRecord(foundReg); Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) { + // For minopts, just unassign `foundReg` from existing interval unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); } } @@ -13916,7 +13915,6 @@ regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInter { // We won't spill if this refPosition is not an actual ref or is RegOptional currentInterval->assignedReg = nullptr; - foundRegBit = RBM_NONE; return RBM_NONE; } else @@ -13933,6 +13931,5 @@ regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInter } assert(found && isSingleRegister(candidates)); - foundRegBit = candidates; return candidates; } From 01b29b06e0883f5232348dd3cb13279f7fe69cae Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 00:09:24 -0800 Subject: [PATCH 18/33] Remove unnecessary checks for `freeCandidates == RBM_NONE` --- src/coreclr/jit/lsra.cpp | 54 +++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b30249bdfc7d6..0aea335dec0eb 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12645,10 +12645,16 @@ bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, void LinearScan::RegisterSelection::try_FREE() { assert(!found); +#ifdef DEBUG + // We try the "free" heuristics only if we have free candidates and this is + // guarded by a check in Release mode. + // In Debug mode, JitLsraOrdering can reorder the heuristics and we can come + // here in any order, hence we should check if it is RBM_NONE or not. if (freeCandidates == RBM_NONE) { return; } +#endif found = applySelection(FREE, freeCandidates); } @@ -12661,10 +12667,6 @@ void LinearScan::RegisterSelection::try_FREE() void LinearScan::RegisterSelection::try_CONST_AVAILABLE() { assert(!found); - if (freeCandidates == RBM_NONE) - { - return; - } if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { @@ -12684,10 +12686,13 @@ void LinearScan::RegisterSelection::try_CONST_AVAILABLE() void LinearScan::RegisterSelection::try_THIS_ASSIGNED() { assert(!found); +#ifdef DEBUG + // See comments in try_FREE if (freeCandidates == RBM_NONE) { return; } +#endif if (currentInterval->assignedReg != nullptr) { @@ -12717,6 +12722,11 @@ void LinearScan::RegisterSelection::try_OWN_PREFERENCE() assert(!found); #ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } calculateCoversSets(); #endif @@ -12731,6 +12741,11 @@ void LinearScan::RegisterSelection::try_COVERS_RELATED() assert(!found); #ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } calculateCoversSets(); #endif @@ -12743,6 +12758,13 @@ void LinearScan::RegisterSelection::try_COVERS_RELATED() void LinearScan::RegisterSelection::try_RELATED_PREFERENCE() { assert(!found); +#ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } +#endif found = applySelection(RELATED_PREFERENCE, relatedPreferences & freeCandidates); } @@ -12753,6 +12775,13 @@ void LinearScan::RegisterSelection::try_RELATED_PREFERENCE() void LinearScan::RegisterSelection::try_CALLER_CALLEE() { assert(!found); +#ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } +#endif found = applySelection(CALLER_CALLEE, callerCalleePrefs & freeCandidates); } @@ -12779,6 +12808,11 @@ void LinearScan::RegisterSelection::try_COVERS_FULL() assert(!found); #ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } calculateCoversSets(); #endif @@ -12798,11 +12832,6 @@ void LinearScan::RegisterSelection::try_BEST_FIT() { assert(!found); - if (freeCandidates == RBM_NONE) - { - return; - } - regMaskTP bestFitSet = RBM_NONE; // If the best score includes COVERS_FULL, pick the one that's killed soonest. // If none cover the full range, the BEST_FIT is the one that's killed later. @@ -12887,11 +12916,6 @@ void LinearScan::RegisterSelection::try_REG_ORDER() { assert(!found); - if (freeCandidates == RBM_NONE) - { - return; - } - // This will always result in a single candidate. That is, it is the tie-breaker // for free candidates, and doesn't make sense as anything other than the last // heuristic for free registers. @@ -13895,7 +13919,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInter { candidates = freeCandidates; - try_REG_ORDER(); + try_REG_ORDER(); } #ifdef DEBUG From f6d8aa7109d3b5a6d8c5aac810ff40161effe92c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 00:10:31 -0800 Subject: [PATCH 19/33] Remove unnecessary checks for `freeCandidates == RBM_NONE` --amend --- src/coreclr/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 0aea335dec0eb..9fc5aa777778f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3038,7 +3038,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, return REG_NA; } - foundReg = genRegNumFromMask(foundRegBit); + foundReg = genRegNumFromMask(foundRegBit); availablePhysRegRecord = getRegisterRecord(foundReg); Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && From 1cbd0b7fdba59582d6463b4d919ac24b7f1445c2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 00:47:02 -0800 Subject: [PATCH 20/33] wip --- src/coreclr/jit/lsra.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9fc5aa777778f..673c86449f4fd 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1402,14 +1402,7 @@ PhaseStatus LinearScan::doLinearScan() #ifdef TARGET_ARM64 if (compiler->info.compNeedsConsecutiveRegisters) { - if (enregisterLocalVars) - { - allocateRegisters(); - } - else - { - allocateRegistersForMinOpt(); - } + allocateRegisters(); } else #endif // TARGET_ARM64 From ead571c191fd3b63898bc6f5e148b2e0b2380989 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 16:16:09 -0800 Subject: [PATCH 21/33] add allocateRegForMinOpts() --- src/coreclr/jit/lsra.cpp | 57 +++++++++++++++++++++------------------- src/coreclr/jit/lsra.h | 1 + 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 673c86449f4fd..ab0dfa3e58cb2 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2916,6 +2916,33 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo return false; } +regNumber LinearScan::allocateRegForMinOpts(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) +{ + regNumber foundReg; + regMaskTP foundRegBit; + RegRecord* availablePhysRegRecord; + foundRegBit = regSelector->selectMinOpts(currentInterval, refPosition DEBUG_ARG(registerScore)); + if (foundRegBit == RBM_NONE) + { + return REG_NA; + } + + foundReg = genRegNumFromMask(foundRegBit); + availablePhysRegRecord = getRegisterRecord(foundReg); + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && + isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) + { + // For minopts, just unassign `foundReg` from existing interval + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + } + + assignPhysReg(availablePhysRegRecord, currentInterval); + refPosition->registerAssignment = foundRegBit; + return foundReg; +} + //------------------------------------------------------------------------ // allocateReg: Find a register that satisfies the requirements for refPosition, // taking into account the preferences for the given Interval, @@ -2954,20 +2981,15 @@ template regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regNumber foundReg; - regMaskTP foundRegBit; - RegRecord* availablePhysRegRecord; - if (enregisterLocalVars || needsConsecutiveRegisters) - { - foundRegBit = + regMaskTP foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { return REG_NA; } - foundReg = genRegNumFromMask(foundRegBit); - availablePhysRegRecord = getRegisterRecord(foundReg); + regNumber foundReg = genRegNumFromMask(foundRegBit); + RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) @@ -3022,25 +3044,6 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, } } } - } - else - { - foundRegBit = regSelector->selectMinOpts(currentInterval, refPosition DEBUG_ARG(registerScore)); - if (foundRegBit == RBM_NONE) - { - return REG_NA; - } - - foundReg = genRegNumFromMask(foundRegBit); - availablePhysRegRecord = getRegisterRecord(foundReg); - Interval* assignedInterval = availablePhysRegRecord->assignedInterval; - if ((assignedInterval != currentInterval) && - isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) - { - // For minopts, just unassign `foundReg` from existing interval - unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); - } - } assignPhysReg(availablePhysRegRecord, currentInterval); refPosition->registerAssignment = foundRegBit; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index e427ea20ecba0..f0a3d8b8b7654 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1193,6 +1193,7 @@ class LinearScan : public LinearScanInterface template regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); template + regNumber allocateRegForMinOpts(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); regNumber assignCopyReg(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); From eaff6094d36b993652f718c0ed7cc24ebfab5158 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 16:17:40 -0800 Subject: [PATCH 22/33] Remove hasConsecutiveRegister template from allocateRegistersForMinOpt() --- src/coreclr/jit/lsra.cpp | 205 +-------------------------------------- src/coreclr/jit/lsra.h | 3 - 2 files changed, 2 insertions(+), 206 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index ab0dfa3e58cb2..5541237f97ba2 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4787,9 +4787,6 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) // LinearScan::allocateRegistersForMinOpt: Perform the actual register allocation for MinOpts by iterating over // all of the previously constructed Intervals // -#ifdef TARGET_ARM64 -template -#endif void LinearScan::allocateRegistersForMinOpt() { JITDUMP("*************** In LinearScan::allocateRegistersForMinOpt()\n"); @@ -4932,12 +4929,6 @@ void LinearScan::allocateRegistersForMinOpt() copyRegsToFree = RBM_NONE; regsInUseThisLocation = regsInUseNextLocation; regsInUseNextLocation = RBM_NONE; -#ifdef TARGET_ARM64 - if (hasConsecutiveRegister) - { - consecutiveRegsInUseThisLocation = RBM_NONE; - } -#endif if ((regsToFree | delayRegsToFree) != RBM_NONE) { freeRegisters(regsToFree); @@ -5056,24 +5047,6 @@ void LinearScan::allocateRegistersForMinOpt() } } prevLocation = currentLocation; -#ifdef TARGET_ARM64 - -#ifdef DEBUG - if (hasConsecutiveRegister) - { - if (currentRefPosition.needsConsecutive) - { - // track all the refpositions around the location that is also - // allocating consecutive registers. - consecutiveRegistersLocation = currentLocation; - } - else if (consecutiveRegistersLocation < currentLocation) - { - consecutiveRegistersLocation = MinLocation; - } - } -#endif // DEBUG -#endif // TARGET_ARM64 // get previous refposition, then current refpos is the new previous if (currentReferent != nullptr) @@ -5278,89 +5251,6 @@ void LinearScan::allocateRegistersForMinOpt() } else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) { -#ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) - { - // For consecutive registers, if the first RefPosition is already assigned to a register, - // check if consecutive registers are free so they can be assigned to the subsequent - // RefPositions. - if (canAssignNextConsecutiveRegisters(¤tRefPosition, assignedRegister)) - { - // Current assignedRegister satisfies the consecutive registers requirements - currentRefPosition.registerAssignment = assignedRegBit; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); - - assignConsecutiveRegisters(¤tRefPosition, assignedRegister); - } - else - { - // It doesn't satisfy, so do a copyReg for the first RefPosition to such a register, so - // it would be possible to allocate consecutive registers to the subsequent RefPositions. - regNumber copyReg = assignCopyReg(¤tRefPosition); - assignConsecutiveRegisters(¤tRefPosition, copyReg); - - if (copyReg != assignedRegister) - { - lastAllocatedRefPosition = ¤tRefPosition; - regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); - regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); - - if ((consecutiveRegsInUseThisLocation & assignedRegMask) != RBM_NONE) - { - // If assigned register is one of the consecutive register we are about to assign - // to the subsequent RefPositions, do not mark it busy otherwise when we allocate - // for that particular subsequent RefPosition, we will not find any candidates - // available. Not marking it busy should be fine because we have already set the - // register assignments for all the consecutive refpositions. - assignedRegMask = RBM_NONE; - } - - // For consecutive register, although it shouldn't matter what the assigned register was, - // because we have just assigned it `copyReg` and that's the one in-use, and not the - // one that was assigned previously. However, in situation where an upper-vector restore - // happened to be restored in assignedReg, we would need assignedReg to stay alive because - // we will copy the entire vector value from it to the `copyReg`. - - updateRegsFreeBusyState(currentRefPosition, assignedRegMask | copyRegMask, ®sToFree, - &delayRegsToFree DEBUG_ARG(currentInterval) - DEBUG_ARG(assignedRegister)); - if (!currentRefPosition.lastUse) - { - copyRegsToFree |= copyRegMask; - } - - // If this is a tree temp (non-localVar) interval, we will need an explicit move. - // Note: In theory a moveReg should cause the Interval to now have the new reg as its - // assigned register. However, that's not currently how this works. - // If we ever actually move lclVar intervals instead of copying, this will need to change. - if (!currentInterval->isLocalVar) - { - currentRefPosition.moveReg = true; - currentRefPosition.copyReg = false; - } - clearNextIntervalRef(copyReg, currentInterval->registerType); - clearSpillCost(copyReg, currentInterval->registerType); - updateNextIntervalRef(assignedRegister, currentInterval); - updateSpillCost(assignedRegister, currentInterval); - } - else - { - // We first noticed that with assignedRegister, we were not getting consecutive registers - // assigned, so we decide to perform copyReg. However, copyReg assigned same register - // because there were no other free registers that would satisfy the consecutive registers - // requirements. In such case, just revert the copyReg state update. - currentRefPosition.copyReg = false; - - // Current assignedRegister satisfies the consecutive registers requirements - currentRefPosition.registerAssignment = assignedRegBit; - } - - continue; - } - } - else -#endif - { currentRefPosition.registerAssignment = assignedRegBit; if (!currentInterval->isActive) { @@ -5368,51 +5258,17 @@ void LinearScan::allocateRegistersForMinOpt() } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); } - } else { // It's already in a register, but not one we need. if (!RefTypeIsDef(currentRefPosition.refType)) { - regNumber copyReg; -#ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.needsConsecutive && - currentRefPosition.refType == RefTypeUse) - { - copyReg = assignCopyReg(¤tRefPosition); - } - else -#endif - { - copyReg = assignCopyReg(¤tRefPosition); - } + regNumber copyReg = assignCopyReg(¤tRefPosition); lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); -#ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) - { - if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) - { - // If the first RefPosition was not assigned to the register that we wanted, we added - // a copyReg for it. Allocate subsequent RefPositions with the consecutive - // registers. - assignConsecutiveRegisters(¤tRefPosition, copyReg); - } - - if ((consecutiveRegsInUseThisLocation & assignedRegMask) != RBM_NONE) - { - // If assigned register is one of the consecutive register we are about to assign - // to the subsequent RefPositions, do not mark it busy otherwise when we allocate - // for that particular subsequent RefPosition, we will not find any candidates - // available. Not marking it busy should be fine because we have already set the - // register assignments for all the consecutive refpositions. - assignedRegMask = RBM_NONE; - } - } -#endif // For consecutive register, although it shouldn't matter what the assigned register was, // because we have just assigned it `copyReg` and that's the one in-use, and not the // one that was assigned previously. However, in situation where an upper-vector restore @@ -5448,48 +5304,6 @@ void LinearScan::allocateRegistersForMinOpt() } } -#ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) - { - // For consecutive register, we would like to assign a register (if not already assigned) - // to the 1st refPosition and the subsequent refPositions will just get the consecutive register. - if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) - { - if (assignedRegister != REG_NA) - { - // For the 1st refPosition, if it already has a register assigned, then just assign - // subsequent registers to the remaining position and skip the allocation for the - // 1st refPosition altogether. - - if (!canAssignNextConsecutiveRegisters(¤tRefPosition, assignedRegister)) - { - // The consecutive registers are busy. Force to allocate even for the 1st - // refPosition - assignedRegister = REG_NA; - RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); - currentRefPosition.registerAssignment = allRegs(currentInterval->registerType); - } - } - } - else if (currentRefPosition.refType == RefTypeUse) - { - // remaining refPosition of the series... - if (assignedRegBit == currentRefPosition.registerAssignment) - { - // For the subsequent position, if they already have the subsequent register assigned, then - // no need to find register to assign. - allocate = false; - } - else - { - // If the subsequent refPosition is not assigned to the consecutive register, then reassign the - // right consecutive register. - assignedRegister = REG_NA; - } - } - } -#endif // TARGET_ARM64 - if (assignedRegister == REG_NA) { if (currentRefPosition.RegOptional()) @@ -5532,22 +5346,7 @@ void LinearScan::allocateRegistersForMinOpt() { unassignPhysReg(currentInterval->assignedReg, nullptr); } - -#ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) - { - assignedRegister = - allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); - if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) - { - assignConsecutiveRegisters(¤tRefPosition, assignedRegister); - } - } - else -#endif // TARGET_ARM64 - { - assignedRegister = allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); - } + assignedRegister = allocateRegForMinOpts(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); } // If no register was found, this RefPosition must not require a register. diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index f0a3d8b8b7654..d070f1c648296 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -638,9 +638,6 @@ class LinearScan : public LinearScanInterface void buildIntervals(); // This is where the actual assignment is done -#ifdef TARGET_ARM64 - template -#endif void allocateRegistersForMinOpt(); // This is where the actual assignment is done From bc15c745dfa79ad9667a2ab122b5d7b738d35eef Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 16:19:07 -0800 Subject: [PATCH 23/33] Add template parameter for assignCopyReg() and fix the uses --- src/coreclr/jit/lsra.cpp | 38 ++++++++++++++++++++++++++++++-------- src/coreclr/jit/lsra.h | 2 +- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5541237f97ba2..8cfa1babd39d9 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3258,9 +3258,12 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // Prefer a free register that's got the earliest next use. // Otherwise, spill something with the farthest next use // -template +template regNumber LinearScan::assignCopyReg(RefPosition* refPosition) { + // We will never have hasConsecutiveRegister = false, but needsConsecutiveRegisters = true + static_assert_no_msg(hasConsecutiveRegister || !needsConsecutiveRegisters); + Interval* currentInterval = refPosition->getInterval(); assert(currentInterval != nullptr); assert(currentInterval->isActive); @@ -3281,8 +3284,20 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) refPosition->copyReg = true; RegisterScore registerScore = NONE; - regNumber allocatedReg = - allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); + + regNumber allocatedReg; + if (enregisterLocalVars +#ifdef TARGET_ARM64 + || hasConsecutiveRegister +#endif + ) + { + allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); + } + else + { + allocatedReg = allocateRegForMinOpts(currentInterval, refPosition DEBUG_ARG(®isterScore)); + } assert(allocatedReg != REG_NA); @@ -6322,7 +6337,7 @@ void LinearScan::allocateRegisters() { // It doesn't satisfy, so do a copyReg for the first RefPosition to such a register, so // it would be possible to allocate consecutive registers to the subsequent RefPositions. - regNumber copyReg = assignCopyReg(¤tRefPosition); + regNumber copyReg = assignCopyReg(¤tRefPosition); assignConsecutiveRegisters(¤tRefPosition, copyReg); if (copyReg != assignedRegister) @@ -6413,15 +6428,22 @@ void LinearScan::allocateRegisters() { regNumber copyReg; #ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.needsConsecutive && - currentRefPosition.refType == RefTypeUse) + + if (hasConsecutiveRegister) + { + if (currentRefPosition.needsConsecutive && currentRefPosition.refType == RefTypeUse) + { + copyReg = assignCopyReg(¤tRefPosition); + } + else { - copyReg = assignCopyReg(¤tRefPosition); + copyReg = assignCopyReg(¤tRefPosition); + } } else #endif { - copyReg = assignCopyReg(¤tRefPosition); + copyReg = assignCopyReg(¤tRefPosition); } lastAllocatedRefPosition = ¤tRefPosition; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index d070f1c648296..bee80df7fa0f6 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1189,8 +1189,8 @@ class LinearScan : public LinearScanInterface #endif template regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - template regNumber allocateRegForMinOpts(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + template regNumber assignCopyReg(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); From c06b01e656b1da33be3e3cc7b48beca4aed3d017 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 16:28:45 -0800 Subject: [PATCH 24/33] fix calls to processBlockEndAllocation() --- src/coreclr/jit/lsra.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 8cfa1babd39d9..8b8ae2187b378 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5911,7 +5911,24 @@ void LinearScan::allocateRegisters() } else { +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister) + { + if (enregisterLocalVars) + { + processBlockEndAllocation(currentBlock); + + } + else + { + processBlockEndAllocation(currentBlock); + } + } + else +#endif + { processBlockEndAllocation(currentBlock); + } currentBlock = moveToNextBlock(); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); } @@ -6764,7 +6781,7 @@ void LinearScan::allocateRegisters() // For the JIT32_GCENCODER, when lvaKeepAliveAndReportThis is true, we must either keep the "this" pointer // in the same register for the entire method, or keep it on the stack. Rather than imposing this constraint // as we allocate, we will force all refs to the stack if it is split or spilled. - if (enregisterLocalVars && compiler->lvaKeepAliveAndReportThis()) + if (compiler->lvaKeepAliveAndReportThis()) { LclVarDsc* thisVarDsc = compiler->lvaGetDesc(compiler->info.compThisArg); if (thisVarDsc->lvLRACandidate) From 053964621de8e04152e2590c5559b0a27f297728 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 16:35:15 -0800 Subject: [PATCH 25/33] fix bug about initializing vectorIter --- src/coreclr/jit/lsra.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 8b8ae2187b378..961d50c46f7ba 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5562,9 +5562,13 @@ void LinearScan::allocateRegisters() } } - resetRegState(); - #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +#ifdef TARGET_ARM64 + // allocateRegisters() can be called even if enregisterLocalVars= false, but + // method needs consecutive registers. + if (enregisterLocalVars) +#endif + { VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); unsigned largeVectorVarIndex = 0; while (largeVectorVarsIter.NextElem(&largeVectorVarIndex)) @@ -5572,8 +5576,10 @@ void LinearScan::allocateRegisters() Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex); lclVarInterval->isPartiallySpilled = false; } + } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + resetRegState(); for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) { RegRecord* physRegRecord = getRegisterRecord(reg); From 90fd839fd4101b9d478108b11fa8af03686b57ae Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 16:36:20 -0800 Subject: [PATCH 26/33] Add assignCopyRegForMinOpts --- src/coreclr/jit/lsra.cpp | 214 ++++++++++++++++++++++----------------- src/coreclr/jit/lsra.h | 3 +- 2 files changed, 122 insertions(+), 95 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 961d50c46f7ba..1e9d4825ec882 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2982,68 +2982,68 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { regMaskTP foundRegBit = - regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); - if (foundRegBit == RBM_NONE) - { - return REG_NA; - } + regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); + if (foundRegBit == RBM_NONE) + { + return REG_NA; + } regNumber foundReg = genRegNumFromMask(foundRegBit); RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); - Interval* assignedInterval = availablePhysRegRecord->assignedInterval; - if ((assignedInterval != currentInterval) && - isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && + isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) + { + if (regSelector->isSpilling()) { - if (regSelector->isSpilling()) - { - // We're spilling. - CLANG_FORMAT_COMMENT_ANCHOR; + // We're spilling. + CLANG_FORMAT_COMMENT_ANCHOR; #ifdef TARGET_ARM - if (currentInterval->registerType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); - unassignDoublePhysReg(availablePhysRegRecord); - } - else if (assignedInterval->registerType == TYP_DOUBLE) - { - // Make sure we spill both halves of the double register. - assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); - unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); - } - else + if (currentInterval->registerType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); + unassignDoublePhysReg(availablePhysRegRecord); + } + else if (assignedInterval->registerType == TYP_DOUBLE) + { + // Make sure we spill both halves of the double register. + assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); + unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); + } + else #endif - { - unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); - } + { + unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); + } + } + else + { + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + // For historical reasons (due to former short-circuiting of this case), if we're reassigning + // the current interval to a previous assignment, we don't remember the previous interval. + // Note that we need to compute this condition before calling unassignPhysReg, which wil reset + // assignedInterval->physReg. + bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && + (assignedInterval->physReg == foundReg); + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) + { + assert(assignedInterval->isConstant); + refPosition->treeNode->SetReuseRegVal(); + } + else if (wasAssigned) + { + updatePreviousInterval(availablePhysRegRecord, + assignedInterval ARM_ARG(assignedInterval->registerType)); } else { - // If we considered this "unassigned" because this interval's lifetime ends before - // the next ref, remember it. - // For historical reasons (due to former short-circuiting of this case), if we're reassigning - // the current interval to a previous assignment, we don't remember the previous interval. - // Note that we need to compute this condition before calling unassignPhysReg, which wil reset - // assignedInterval->physReg. - bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && - (assignedInterval->physReg == foundReg); - unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); - if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) - { - assert(assignedInterval->isConstant); - refPosition->treeNode->SetReuseRegVal(); - } - else if (wasAssigned) - { - updatePreviousInterval(availablePhysRegRecord, - assignedInterval ARM_ARG(assignedInterval->registerType)); - } - else - { - assert(!regSelector->isConstAvailable()); - } + assert(!regSelector->isConstAvailable()); } } + } assignPhysReg(availablePhysRegRecord, currentInterval); refPosition->registerAssignment = foundRegBit; @@ -3258,12 +3258,8 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // Prefer a free register that's got the earliest next use. // Otherwise, spill something with the farthest next use // -template -regNumber LinearScan::assignCopyReg(RefPosition* refPosition) +regNumber LinearScan::assignCopyRegForMinOpts(RefPosition* refPosition) { - // We will never have hasConsecutiveRegister = false, but needsConsecutiveRegisters = true - static_assert_no_msg(hasConsecutiveRegister || !needsConsecutiveRegisters); - Interval* currentInterval = refPosition->getInterval(); assert(currentInterval != nullptr); assert(currentInterval->isActive); @@ -3285,20 +3281,56 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) RegisterScore registerScore = NONE; - regNumber allocatedReg; - if (enregisterLocalVars -#ifdef TARGET_ARM64 - || hasConsecutiveRegister -#endif - ) - { - allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); - } - else - { - allocatedReg = allocateRegForMinOpts(currentInterval, refPosition DEBUG_ARG(®isterScore)); - } + regNumber allocatedReg = allocateRegForMinOpts(currentInterval, refPosition DEBUG_ARG(®isterScore)); + assert(allocatedReg != REG_NA); + + // restore the related interval + currentInterval->relatedInterval = savedRelatedInterval; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, allocatedReg, nullptr, registerScore)); + + // Now restore the old info + currentInterval->physReg = oldPhysReg; + currentInterval->assignedReg = oldRegRecord; + currentInterval->isActive = true; + + return allocatedReg; +} +// Grab a register to use to copy and then immediately use. +// This is called only for localVar intervals that already have a register +// assignment that is not compatible with the current RefPosition. +// This is not like regular assignment, because we don't want to change +// any preferences or existing register assignments. +// Prefer a free register that's got the earliest next use. +// Otherwise, spill something with the farthest next use +// +template +regNumber LinearScan::assignCopyReg(RefPosition* refPosition) +{ + Interval* currentInterval = refPosition->getInterval(); + assert(currentInterval != nullptr); + assert(currentInterval->isActive); + + // Save the relatedInterval, if any, so that it doesn't get modified during allocation. + Interval* savedRelatedInterval = currentInterval->relatedInterval; + currentInterval->relatedInterval = nullptr; + + // We don't want really want to change the default assignment, + // so 1) pretend this isn't active, and 2) remember the old reg + regNumber oldPhysReg = currentInterval->physReg; + RegRecord* oldRegRecord = currentInterval->assignedReg; + assert(oldRegRecord->regNum == oldPhysReg); + currentInterval->isActive = false; + + // We *must* allocate a register, and it will be a copyReg. Set that field now, so that + // refPosition->RegOptional() will return false. + refPosition->copyReg = true; + + RegisterScore registerScore = NONE; + + regNumber allocatedReg = + allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); assert(allocatedReg != REG_NA); // restore the related interval @@ -5266,19 +5298,19 @@ void LinearScan::allocateRegistersForMinOpt() } else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) { - currentRefPosition.registerAssignment = assignedRegBit; - if (!currentInterval->isActive) - { - currentRefPosition.reload = true; - } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); + currentRefPosition.registerAssignment = assignedRegBit; + if (!currentInterval->isActive) + { + currentRefPosition.reload = true; } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); + } else { // It's already in a register, but not one we need. if (!RefTypeIsDef(currentRefPosition.refType)) { - regNumber copyReg = assignCopyReg(¤tRefPosition); + regNumber copyReg = assignCopyRegForMinOpts(¤tRefPosition); lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); @@ -5569,13 +5601,13 @@ void LinearScan::allocateRegisters() if (enregisterLocalVars) #endif { - VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); - unsigned largeVectorVarIndex = 0; - while (largeVectorVarsIter.NextElem(&largeVectorVarIndex)) - { - Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex); - lclVarInterval->isPartiallySpilled = false; - } + VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); + unsigned largeVectorVarIndex = 0; + while (largeVectorVarsIter.NextElem(&largeVectorVarIndex)) + { + Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex); + lclVarInterval->isPartiallySpilled = false; + } } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -5933,7 +5965,7 @@ void LinearScan::allocateRegisters() else #endif { - processBlockEndAllocation(currentBlock); + processBlockEndAllocation(currentBlock); } currentBlock = moveToNextBlock(); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); @@ -6360,7 +6392,7 @@ void LinearScan::allocateRegisters() { // It doesn't satisfy, so do a copyReg for the first RefPosition to such a register, so // it would be possible to allocate consecutive registers to the subsequent RefPositions. - regNumber copyReg = assignCopyReg(¤tRefPosition); + regNumber copyReg = assignCopyReg(¤tRefPosition); assignConsecutiveRegisters(¤tRefPosition, copyReg); if (copyReg != assignedRegister) @@ -6452,21 +6484,15 @@ void LinearScan::allocateRegisters() regNumber copyReg; #ifdef TARGET_ARM64 - if (hasConsecutiveRegister) + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive && + currentRefPosition.refType == RefTypeUse) { - if (currentRefPosition.needsConsecutive && currentRefPosition.refType == RefTypeUse) - { - copyReg = assignCopyReg(¤tRefPosition); - } - else - { - copyReg = assignCopyReg(¤tRefPosition); - } + copyReg = assignCopyReg(¤tRefPosition); } else #endif { - copyReg = assignCopyReg(¤tRefPosition); + copyReg = assignCopyReg(¤tRefPosition); } lastAllocatedRefPosition = ¤tRefPosition; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index bee80df7fa0f6..ad3b8f5e5b83d 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1190,8 +1190,9 @@ class LinearScan : public LinearScanInterface template regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); regNumber allocateRegForMinOpts(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - template + template regNumber assignCopyReg(RefPosition* refPosition); + regNumber assignCopyRegForMinOpts(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); bool isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord); From ab744fa23b11360c6da77e34ddd3581c63c4196b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 16:37:50 -0800 Subject: [PATCH 27/33] jit format --- src/coreclr/jit/lsra.cpp | 12 ++++++------ src/coreclr/jit/lsra.h | 5 +++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 1e9d4825ec882..2bd6460f541e0 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2916,8 +2916,8 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo return false; } -regNumber LinearScan::allocateRegForMinOpts(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) +regNumber LinearScan::allocateRegForMinOpts(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { regNumber foundReg; regMaskTP foundRegBit; @@ -3026,7 +3026,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, // Note that we need to compute this condition before calling unassignPhysReg, which wil reset // assignedInterval->physReg. bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && - (assignedInterval->physReg == foundReg); + (assignedInterval->physReg == foundReg); unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) { @@ -3036,7 +3036,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, else if (wasAssigned) { updatePreviousInterval(availablePhysRegRecord, - assignedInterval ARM_ARG(assignedInterval->registerType)); + assignedInterval ARM_ARG(assignedInterval->registerType)); } else { @@ -5393,7 +5393,8 @@ void LinearScan::allocateRegistersForMinOpt() { unassignPhysReg(currentInterval->assignedReg, nullptr); } - assignedRegister = allocateRegForMinOpts(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + assignedRegister = + allocateRegForMinOpts(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); } // If no register was found, this RefPosition must not require a register. @@ -5955,7 +5956,6 @@ void LinearScan::allocateRegisters() if (enregisterLocalVars) { processBlockEndAllocation(currentBlock); - } else { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ad3b8f5e5b83d..2f84b36efffa9 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -637,7 +637,7 @@ class LinearScan : public LinearScanInterface template void buildIntervals(); -// This is where the actual assignment is done + // This is where the actual assignment is done void allocateRegistersForMinOpt(); // This is where the actual assignment is done @@ -1189,7 +1189,8 @@ class LinearScan : public LinearScanInterface #endif template regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - regNumber allocateRegForMinOpts(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + regNumber allocateRegForMinOpts(Interval* current, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); template regNumber assignCopyReg(RefPosition* refPosition); regNumber assignCopyRegForMinOpts(RefPosition* refPosition); From 3485737fd77e63680ce7830369617debb1a57965 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 4 Jan 2024 22:49:24 -0800 Subject: [PATCH 28/33] rename from *MinOpts to *Minimal --- src/coreclr/jit/lsra.cpp | 45 ++++++++++++++++++++++++++-------------- src/coreclr/jit/lsra.h | 13 ++++++------ 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 2bd6460f541e0..db117fa56e4aa 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1413,7 +1413,7 @@ PhaseStatus LinearScan::doLinearScan() } else { - allocateRegistersForMinOpt(); + allocateRegistersMinimal(); } } @@ -2916,13 +2916,22 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo return false; } -regNumber LinearScan::allocateRegForMinOpts(Interval* currentInterval, +//------------------------------------------------------------------------ +// allocateRegMinimal: Find a register that satisfies the requirements for refPosition, +// taking into account the preferences for the given Interval, +// and possibly spilling a lower weight Interval. +// +// Note: This is a minimal version of `allocateReg()` and used when localVars are not set +// for enregistration. It simply finds the register to be assigned, if it was assigned to something +// else, then will unassign it and then assign to the currentInterval +// +regNumber LinearScan::allocateRegMinimal(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { regNumber foundReg; regMaskTP foundRegBit; RegRecord* availablePhysRegRecord; - foundRegBit = regSelector->selectMinOpts(currentInterval, refPosition DEBUG_ARG(registerScore)); + foundRegBit = regSelector->selectMinimal(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { return REG_NA; @@ -3258,7 +3267,7 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // Prefer a free register that's got the earliest next use. // Otherwise, spill something with the farthest next use // -regNumber LinearScan::assignCopyRegForMinOpts(RefPosition* refPosition) +regNumber LinearScan::assignCopyRegMinimal(RefPosition* refPosition) { Interval* currentInterval = refPosition->getInterval(); assert(currentInterval != nullptr); @@ -3281,7 +3290,7 @@ regNumber LinearScan::assignCopyRegForMinOpts(RefPosition* refPosition) RegisterScore registerScore = NONE; - regNumber allocatedReg = allocateRegForMinOpts(currentInterval, refPosition DEBUG_ARG(®isterScore)); + regNumber allocatedReg = allocateRegMinimal(currentInterval, refPosition DEBUG_ARG(®isterScore)); assert(allocatedReg != REG_NA); // restore the related interval @@ -4831,13 +4840,13 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) } //------------------------------------------------------------------------ -// LinearScan::allocateRegistersForMinOpt: Perform the actual register allocation for MinOpts by iterating over -// all of the previously constructed Intervals +// LinearScan::allocateRegistersMinimal: Perform the actual register allocation when localVars +// are not enregistered. // -void LinearScan::allocateRegistersForMinOpt() +void LinearScan::allocateRegistersMinimal() { - JITDUMP("*************** In LinearScan::allocateRegistersForMinOpt()\n"); - DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegistersForMinOpt")); + JITDUMP("*************** In LinearScan::allocateRegistersMinimal()\n"); + DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegistersMinimal")); // at start, nothing is active except for register args for (Interval& interval : intervals) @@ -5310,7 +5319,7 @@ void LinearScan::allocateRegistersForMinOpt() // It's already in a register, but not one we need. if (!RefTypeIsDef(currentRefPosition.refType)) { - regNumber copyReg = assignCopyRegForMinOpts(¤tRefPosition); + regNumber copyReg = assignCopyRegMinimal(¤tRefPosition); lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); @@ -5394,7 +5403,7 @@ void LinearScan::allocateRegistersForMinOpt() unassignPhysReg(currentInterval->assignedReg, nullptr); } assignedRegister = - allocateRegForMinOpts(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + allocateRegMinimal(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); } // If no register was found, this RefPosition must not require a register. @@ -12411,7 +12420,7 @@ LinearScan::RegisterSelection::RegisterSelection(LinearScan* linearScan) // ---------------------------------------------------------- // reset: Resets the values of all the fields used for register selection. // -void LinearScan::RegisterSelection::resetMinOpts(Interval* interval, RefPosition* refPos) +void LinearScan::RegisterSelection::resetMinimal(Interval* interval, RefPosition* refPos) { currentInterval = interval; refPosition = refPos; @@ -13638,7 +13647,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, } // ---------------------------------------------------------- -// selectMinOpts: For given `currentInterval` and `refPosition`, selects a register to be assigned. +// selectMinimal: For given `currentInterval` and `refPosition`, selects a register to be assigned. // // Arguments: // currentInterval - Current interval for which register needs to be selected. @@ -13647,7 +13656,11 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, // Return Values: // Register bit selected (a single register) and REG_NA if no register was selected. // -regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInterval, +// Note - This routine just eliminates the busy candidates and the ones that has conflicting use and +// select the REG_ORDER heuristics (if there are any free candidates) or REG_NUM (if all registers +// are busy). +// +regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { #ifdef DEBUG @@ -13658,7 +13671,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinOpts(Interval* currentInter assert(!refPosition->needsConsecutive); #endif - resetMinOpts(currentInterval, refPosition); + resetMinimal(currentInterval, refPosition); // process data-structures if (RefTypeIsDef(refPosition->refType)) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 2f84b36efffa9..7a9578ac6cbb5 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -637,8 +637,9 @@ class LinearScan : public LinearScanInterface template void buildIntervals(); - // This is where the actual assignment is done - void allocateRegistersForMinOpt(); + // This is where the actual assignment is done for scenarios where + // no local var enregistration is done. + void allocateRegistersMinimal(); // This is where the actual assignment is done #ifdef TARGET_ARM64 @@ -1189,11 +1190,11 @@ class LinearScan : public LinearScanInterface #endif template regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - regNumber allocateRegForMinOpts(Interval* current, + regNumber allocateRegMinimal(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); template regNumber assignCopyReg(RefPosition* refPosition); - regNumber assignCopyRegForMinOpts(RefPosition* refPosition); + regNumber assignCopyRegMinimal(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); bool isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord); @@ -1267,7 +1268,7 @@ class LinearScan : public LinearScanInterface FORCEINLINE regMaskTP select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - FORCEINLINE regMaskTP selectMinOpts(Interval* currentInterval, + FORCEINLINE regMaskTP selectMinimal(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); // If the register is from unassigned set such that it was not already @@ -1355,7 +1356,7 @@ class LinearScan : public LinearScanInterface FORCEINLINE void calculateCoversSets(); FORCEINLINE void calculateUnassignedSets(); FORCEINLINE void reset(Interval* interval, RefPosition* refPosition); - FORCEINLINE void resetMinOpts(Interval* interval, RefPosition* refPosition); + FORCEINLINE void resetMinimal(Interval* interval, RefPosition* refPosition); #define REG_SEL_DEF(stat, value, shortname, orderSeqId) FORCEINLINE void try_##stat(); #define BUSY_REG_SEL_DEF(stat, value, shortname, orderSeqId) REG_SEL_DEF(stat, value, shortname, orderSeqId) From e914e3182ed8fb80c5ba1584a4a9b1db621550e6 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 9 Jan 2024 11:23:34 -0800 Subject: [PATCH 29/33] Remove unneeded method --- src/coreclr/jit/lsra.h | 7 ------- src/coreclr/jit/lsrabuild.cpp | 35 ----------------------------------- 2 files changed, 42 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 7a9578ac6cbb5..89499109d9862 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1162,13 +1162,6 @@ class LinearScan : public LinearScanInterface regMaskTP mask, unsigned multiRegIdx = 0); - // This creates a RefTypeUse at currentLoc. It sets the treeNode to nullptr if it is not a - // lclVar interval. - RefPosition* newUseRefPosition(Interval* theInterval, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx = 0); - RefPosition* newRefPosition( regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 86856b7e7b89f..ae144c95f0f4f 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -650,41 +650,6 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, return newRP; } -//--------------------------------------------------------------------------- -// newUseRefPosition: allocate and initialize a RefTypeUse RefPosition at currentLoc. -// -// Arguments: -// theInterval - interval to which RefPosition is associated with. -// theTreeNode - GenTree node for which this RefPosition is created -// mask - Set of valid registers for this RefPosition -// multiRegIdx - register position if this RefPosition corresponds to a -// multi-reg call node. -// minRegCount - Minimum number registers that needs to be ensured while -// constraining candidates for this ref position under -// LSRA stress. This is a DEBUG only arg. -// -// Return Value: -// a new RefPosition -// -// Notes: -// If the caller knows that 'theTreeNode' is NOT a candidate local, newRefPosition -// can/should be called directly. -// -RefPosition* LinearScan::newUseRefPosition(Interval* theInterval, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx) -{ - GenTree* treeNode = isCandidateLocalRef(theTreeNode) ? theTreeNode : nullptr; - - RefPosition* pos = newRefPosition(theInterval, currentLoc, RefTypeUse, treeNode, mask, multiRegIdx); - if (theTreeNode->IsRegOptional()) - { - pos->setRegOptional(true); - } - return pos; -} - //------------------------------------------------------------------------ // IsContainableMemoryOp: Checks whether this is a memory op that can be contained. // From eb4b0a1f785645176c3e1456a32053b166149585 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 17 Jan 2024 07:07:28 -0800 Subject: [PATCH 30/33] jit format --- src/coreclr/jit/lsra.cpp | 5 ++--- src/coreclr/jit/lsra.h | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index db117fa56e4aa..df41b86fa71e6 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2926,7 +2926,7 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // else, then will unassign it and then assign to the currentInterval // regNumber LinearScan::allocateRegMinimal(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { regNumber foundReg; regMaskTP foundRegBit; @@ -5402,8 +5402,7 @@ void LinearScan::allocateRegistersMinimal() { unassignPhysReg(currentInterval->assignedReg, nullptr); } - assignedRegister = - allocateRegMinimal(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + assignedRegister = allocateRegMinimal(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); } // If no register was found, this RefPosition must not require a register. diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 89499109d9862..ad15588d5c51d 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1183,8 +1183,7 @@ class LinearScan : public LinearScanInterface #endif template regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - regNumber allocateRegMinimal(Interval* current, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + regNumber allocateRegMinimal(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); template regNumber assignCopyReg(RefPosition* refPosition); regNumber assignCopyRegMinimal(RefPosition* refPosition); From 5496c0e86040ecf35dd7be906a01e9896606ba0b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 17 Jan 2024 22:56:01 -0800 Subject: [PATCH 31/33] Add back short-circuit code --- src/coreclr/jit/lsra.cpp | 45 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index df41b86fa71e6..54da0d474f7ea 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12541,6 +12541,13 @@ void LinearScan::RegisterSelection::try_FREE() void LinearScan::RegisterSelection::try_CONST_AVAILABLE() { assert(!found); +#ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } +#endif if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { @@ -12581,6 +12588,14 @@ void LinearScan::RegisterSelection::try_COVERS() { assert(!found); +#ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } +#endif + calculateCoversSets(); found = applySelection(COVERS, coversSet & preferenceSet); @@ -12632,6 +12647,7 @@ void LinearScan::RegisterSelection::try_COVERS_RELATED() void LinearScan::RegisterSelection::try_RELATED_PREFERENCE() { assert(!found); + #ifdef DEBUG // See comments in try_FREE if (freeCandidates == RBM_NONE) @@ -12649,6 +12665,7 @@ void LinearScan::RegisterSelection::try_RELATED_PREFERENCE() void LinearScan::RegisterSelection::try_CALLER_CALLEE() { assert(!found); + #ifdef DEBUG // See comments in try_FREE if (freeCandidates == RBM_NONE) @@ -12668,6 +12685,11 @@ void LinearScan::RegisterSelection::try_UNASSIGNED() assert(!found); #ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } calculateCoversSets(); #endif @@ -12706,6 +12728,14 @@ void LinearScan::RegisterSelection::try_BEST_FIT() { assert(!found); +#ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } +#endif + regMaskTP bestFitSet = RBM_NONE; // If the best score includes COVERS_FULL, pick the one that's killed soonest. // If none cover the full range, the BEST_FIT is the one that's killed later. @@ -12776,6 +12806,13 @@ void LinearScan::RegisterSelection::try_BEST_FIT() // void LinearScan::RegisterSelection::try_IS_PREV_REG() { +#ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } +#endif // TODO: We do not check found here. if ((currentInterval->assignedReg != nullptr) && coversFullApplied) { @@ -12790,6 +12827,14 @@ void LinearScan::RegisterSelection::try_REG_ORDER() { assert(!found); +#ifdef DEBUG + // See comments in try_FREE + if (freeCandidates == RBM_NONE) + { + return; + } +#endif + // This will always result in a single candidate. That is, it is the tie-breaker // for free candidates, and doesn't make sense as anything other than the last // heuristic for free registers. From 39d3e9dc716e6543a4fe8d214ca2f630588ed5db Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 19 Jan 2024 17:16:49 -0800 Subject: [PATCH 32/33] Quick LSRA only if optimization is disabled --- src/coreclr/jit/lsra.cpp | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 54da0d474f7ea..d7348a3611a70 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1407,7 +1407,7 @@ PhaseStatus LinearScan::doLinearScan() else #endif // TARGET_ARM64 { - if (enregisterLocalVars) + if (enregisterLocalVars || compiler->opts.OptimizationEnabled()) { allocateRegisters(); } @@ -2928,6 +2928,7 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo regNumber LinearScan::allocateRegMinimal(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { + assert(!enregisterLocalVars); regNumber foundReg; regMaskTP foundRegBit; RegRecord* availablePhysRegRecord; @@ -4845,6 +4846,7 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) // void LinearScan::allocateRegistersMinimal() { + assert(!enregisterLocalVars); JITDUMP("*************** In LinearScan::allocateRegistersMinimal()\n"); DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegistersMinimal")); @@ -5604,11 +5606,7 @@ void LinearScan::allocateRegisters() } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE -#ifdef TARGET_ARM64 - // allocateRegisters() can be called even if enregisterLocalVars= false, but - // method needs consecutive registers. if (enregisterLocalVars) -#endif { VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); unsigned largeVectorVarIndex = 0; @@ -5958,22 +5956,13 @@ void LinearScan::allocateRegisters() } else { -#ifdef TARGET_ARM64 - if (hasConsecutiveRegister) + if (enregisterLocalVars) { - if (enregisterLocalVars) - { - processBlockEndAllocation(currentBlock); - } - else - { - processBlockEndAllocation(currentBlock); - } + processBlockEndAllocation(currentBlock); } else -#endif { - processBlockEndAllocation(currentBlock); + processBlockEndAllocation(currentBlock); } currentBlock = moveToNextBlock(); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock)); @@ -12390,7 +12379,7 @@ LinearScan::RegisterSelection::RegisterSelection(LinearScan* linearScan) { ordering = W("ABCDEFGHIJKLMNOPQ"); - if (!linearScan->enregisterLocalVars) + if (!linearScan->enregisterLocalVars && linearScan->compiler->opts.OptimizationDisabled()) { ordering = W("MQQQQQQQQQQQQQQQQ"); } @@ -13707,6 +13696,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { + assert(!linearScan->enregisterLocalVars); #ifdef DEBUG *registerScore = NONE; #endif From 55f5b4b7adb289a5d7f55d0cd4fdd4150c10f5fb Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 25 Jan 2024 15:03:53 -0800 Subject: [PATCH 33/33] Add verifyFreeRegisters() --- src/coreclr/jit/lsra.cpp | 325 +++++++++++++++------------------------ src/coreclr/jit/lsra.h | 1 + 2 files changed, 122 insertions(+), 204 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d7348a3611a70..37a2c4b7883a1 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3260,13 +3260,18 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R return canSpill; } -// Grab a register to use to copy and then immediately use. +// ---------------------------------------------------------- +// assignCopyRegMinimal: Grab a register to use to copy and then immediately use. // This is called only for localVar intervals that already have a register // assignment that is not compatible with the current RefPosition. // This is not like regular assignment, because we don't want to change // any preferences or existing register assignments. -// Prefer a free register that's got the earliest next use. -// Otherwise, spill something with the farthest next use +// +// Arguments: +// refPosition - Refposition within the interval for which register needs to be selected. +// +// Return Values: +// Register bit selected (a single register) and REG_NA if no register was selected. // regNumber LinearScan::assignCopyRegMinimal(RefPosition* refPosition) { @@ -5004,103 +5009,7 @@ void LinearScan::allocateRegistersMinimal() delayRegsToFree = RBM_NONE; #ifdef DEBUG - // Validate the current state just after we've freed the registers. This ensures that any pending - // freed registers will have had their state updated to reflect the intervals they were holding. - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) - { - regMaskTP regMask = genRegMask(reg); - // If this isn't available or if it's still waiting to be freed (i.e. it was in - // delayRegsToFree and so now it's in regsToFree), then skip it. - if ((regMask & allAvailableRegs & ~regsToFree) == RBM_NONE) - { - continue; - } - RegRecord* physRegRecord = getRegisterRecord(reg); - Interval* assignedInterval = physRegRecord->assignedInterval; - if (assignedInterval != nullptr) - { - bool isAssignedReg = (assignedInterval->physReg == reg); - RefPosition* recentRefPosition = assignedInterval->recentRefPosition; - // If we have a copyReg or a moveReg, we might have assigned this register to an Interval, - // but that isn't considered its assignedReg. - if (recentRefPosition != nullptr) - { - // For copyReg or moveReg, we don't have anything further to assert. - if (recentRefPosition->copyReg || recentRefPosition->moveReg) - { - continue; - } - assert(assignedInterval->isConstant == isRegConstant(reg, assignedInterval->registerType)); - if (assignedInterval->isActive) - { - // If this is not the register most recently allocated, it must be from a copyReg, - // it was placed there by the inVarToRegMap or it might be one of the upper vector - // save/restore refPosition. - // In either case it must be a lclVar. - - if (!isAssignedToInterval(assignedInterval, physRegRecord)) - { - // We'd like to assert that this was either set by the inVarToRegMap, or by - // a copyReg, but we can't traverse backward to check for a copyReg, because - // we only have recentRefPosition, and there may be a previous RefPosition - // at the same Location with a copyReg. - - bool sanityCheck = assignedInterval->isLocalVar; - // For upper vector interval, make sure it was one of the save/restore only. - if (assignedInterval->IsUpperVector()) - { - sanityCheck |= (recentRefPosition->refType == RefTypeUpperVectorSave) || - (recentRefPosition->refType == RefTypeUpperVectorRestore); - } - - assert(sanityCheck); - } - if (isAssignedReg) - { - assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); - assert(!isRegAvailable(reg, assignedInterval->registerType)); - assert((recentRefPosition == nullptr) || - (spillCost[reg] == getSpillWeight(physRegRecord))); - } - else - { - assert((nextIntervalRef[reg] == MaxLocation) || - isRegBusy(reg, assignedInterval->registerType)); - } - } - else - { - if ((assignedInterval->physReg == reg) && !assignedInterval->isConstant) - { - assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); - } - else - { - assert(nextIntervalRef[reg] == MaxLocation); - assert(isRegAvailable(reg, assignedInterval->registerType)); - assert(spillCost[reg] == 0); - } - } - } - } - else - { - // Available registers should not hold constants - assert(isRegAvailable(reg, physRegRecord->registerType)); - assert(!isRegConstant(reg, physRegRecord->registerType) || spillAlways()); - assert(nextIntervalRef[reg] == MaxLocation); - assert(spillCost[reg] == 0); - } - LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); - assert(nextFixedRef[reg] == thisNextFixedRef); -#ifdef TARGET_ARM - // If this is occupied by a double interval, skip the corresponding float reg. - if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) - { - reg = REG_NEXT(reg); - } -#endif - } + verifyFreeRegisters(regsToFree); #endif // DEBUG } } @@ -5783,112 +5692,9 @@ void LinearScan::allocateRegisters() } regsToFree = delayRegsToFree; delayRegsToFree = RBM_NONE; - #ifdef DEBUG - // Validate the current state just after we've freed the registers. This ensures that any pending - // freed registers will have had their state updated to reflect the intervals they were holding. - for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) - { - regMaskTP regMask = genRegMask(reg); - // If this isn't available or if it's still waiting to be freed (i.e. it was in - // delayRegsToFree and so now it's in regsToFree), then skip it. - if ((regMask & allAvailableRegs & ~regsToFree) == RBM_NONE) - { - continue; - } - RegRecord* physRegRecord = getRegisterRecord(reg); - Interval* assignedInterval = physRegRecord->assignedInterval; - if (assignedInterval != nullptr) - { - bool isAssignedReg = (assignedInterval->physReg == reg); - RefPosition* recentRefPosition = assignedInterval->recentRefPosition; - // If we have a copyReg or a moveReg, we might have assigned this register to an Interval, - // but that isn't considered its assignedReg. - if (recentRefPosition != nullptr) - { - if (recentRefPosition->refType == RefTypeExpUse) - { - // We don't update anything on these, as they're just placeholders to extend the - // lifetime. - continue; - } - // For copyReg or moveReg, we don't have anything further to assert. - if (recentRefPosition->copyReg || recentRefPosition->moveReg) - { - continue; - } - assert(assignedInterval->isConstant == isRegConstant(reg, assignedInterval->registerType)); - if (assignedInterval->isActive) - { - // If this is not the register most recently allocated, it must be from a copyReg, - // it was placed there by the inVarToRegMap or it might be one of the upper vector - // save/restore refPosition. - // In either case it must be a lclVar. - - if (!isAssignedToInterval(assignedInterval, physRegRecord)) - { - // We'd like to assert that this was either set by the inVarToRegMap, or by - // a copyReg, but we can't traverse backward to check for a copyReg, because - // we only have recentRefPosition, and there may be a previous RefPosition - // at the same Location with a copyReg. - - bool sanityCheck = assignedInterval->isLocalVar; - // For upper vector interval, make sure it was one of the save/restore only. - if (assignedInterval->IsUpperVector()) - { - sanityCheck |= (recentRefPosition->refType == RefTypeUpperVectorSave) || - (recentRefPosition->refType == RefTypeUpperVectorRestore); - } - - assert(sanityCheck); - } - if (isAssignedReg) - { - assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); - assert(!isRegAvailable(reg, assignedInterval->registerType)); - assert((recentRefPosition == nullptr) || - (spillCost[reg] == getSpillWeight(physRegRecord))); - } - else - { - assert((nextIntervalRef[reg] == MaxLocation) || - isRegBusy(reg, assignedInterval->registerType)); - } - } - else - { - if ((assignedInterval->physReg == reg) && !assignedInterval->isConstant) - { - assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); - } - else - { - assert(nextIntervalRef[reg] == MaxLocation); - assert(isRegAvailable(reg, assignedInterval->registerType)); - assert(spillCost[reg] == 0); - } - } - } - } - else - { - // Available registers should not hold constants - assert(isRegAvailable(reg, physRegRecord->registerType)); - assert(!isRegConstant(reg, physRegRecord->registerType) || spillAlways()); - assert(nextIntervalRef[reg] == MaxLocation); - assert(spillCost[reg] == 0); - } - LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); - assert(nextFixedRef[reg] == thisNextFixedRef); -#ifdef TARGET_ARM - // If this is occupied by a double interval, skip the corresponding float reg. - if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) - { - reg = REG_NEXT(reg); - } + verifyFreeRegisters(regsToFree); #endif - } -#endif // DEBUG } } prevLocation = currentLocation; @@ -11742,6 +11548,117 @@ bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node) } } +// verifyFreeRegisters: Validate the current state just after we've freed the registers. +// This ensures that any pending freed registers will have had their state updated to +// reflect the intervals they were holding. +// +// Arguments: +// regsToFree - Registers that were just freed. +// +void LinearScan::verifyFreeRegisters(regMaskTP regsToFree) +{ + for (regNumber reg = REG_FIRST; reg < AVAILABLE_REG_COUNT; reg = REG_NEXT(reg)) + { + regMaskTP regMask = genRegMask(reg); + // If this isn't available or if it's still waiting to be freed (i.e. it was in + // delayRegsToFree and so now it's in regsToFree), then skip it. + if ((regMask & allAvailableRegs & ~regsToFree) == RBM_NONE) + { + continue; + } + RegRecord* physRegRecord = getRegisterRecord(reg); + Interval* assignedInterval = physRegRecord->assignedInterval; + if (assignedInterval != nullptr) + { + bool isAssignedReg = (assignedInterval->physReg == reg); + RefPosition* recentRefPosition = assignedInterval->recentRefPosition; + // If we have a copyReg or a moveReg, we might have assigned this register to an Interval, + // but that isn't considered its assignedReg. + if (recentRefPosition != nullptr) + { + if (recentRefPosition->refType == RefTypeExpUse) + { + // We don't update anything on these, as they're just placeholders to extend the + // lifetime. + continue; + } + + // For copyReg or moveReg, we don't have anything further to assert. + if (recentRefPosition->copyReg || recentRefPosition->moveReg) + { + continue; + } + assert(assignedInterval->isConstant == isRegConstant(reg, assignedInterval->registerType)); + if (assignedInterval->isActive) + { + // If this is not the register most recently allocated, it must be from a copyReg, + // it was placed there by the inVarToRegMap or it might be one of the upper vector + // save/restore refPosition. + // In either case it must be a lclVar. + + if (!isAssignedToInterval(assignedInterval, physRegRecord)) + { + // We'd like to assert that this was either set by the inVarToRegMap, or by + // a copyReg, but we can't traverse backward to check for a copyReg, because + // we only have recentRefPosition, and there may be a previous RefPosition + // at the same Location with a copyReg. + + bool sanityCheck = assignedInterval->isLocalVar; + // For upper vector interval, make sure it was one of the save/restore only. + if (assignedInterval->IsUpperVector()) + { + sanityCheck |= (recentRefPosition->refType == RefTypeUpperVectorSave) || + (recentRefPosition->refType == RefTypeUpperVectorRestore); + } + + assert(sanityCheck); + } + if (isAssignedReg) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + assert(!isRegAvailable(reg, assignedInterval->registerType)); + assert((recentRefPosition == nullptr) || (spillCost[reg] == getSpillWeight(physRegRecord))); + } + else + { + assert((nextIntervalRef[reg] == MaxLocation) || isRegBusy(reg, assignedInterval->registerType)); + } + } + else + { + if ((assignedInterval->physReg == reg) && !assignedInterval->isConstant) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + } + else + { + assert(nextIntervalRef[reg] == MaxLocation); + assert(isRegAvailable(reg, assignedInterval->registerType)); + assert(spillCost[reg] == 0); + } + } + } + } + else + { + // Available registers should not hold constants + assert(isRegAvailable(reg, physRegRecord->registerType)); + assert(!isRegConstant(reg, physRegRecord->registerType) || spillAlways()); + assert(nextIntervalRef[reg] == MaxLocation); + assert(spillCost[reg] == 0); + } + LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); + assert(nextFixedRef[reg] == thisNextFixedRef); +#ifdef TARGET_ARM + // If this is occupied by a double interval, skip the corresponding float reg. + if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) + { + reg = REG_NEXT(reg); + } +#endif + } +} + //------------------------------------------------------------------------ // verifyFinalAllocation: Traverse the RefPositions and verify various invariants. // diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ad15588d5c51d..c0e0f5d2fdbd3 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -940,6 +940,7 @@ class LinearScan : public LinearScanInterface static bool IsResolutionMove(GenTree* node); static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node); + void verifyFreeRegisters(regMaskTP regsToFree); void verifyFinalAllocation(); void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation); #else // !DEBUG