Commit fd002fde authored by Wei Pan's avatar Wei Pan Committed by gbsbuild

Relax vISA to compile with user specified number of registers.

There are a few FIXME's to be revisited

Change-Id: I13e450114effaee78a6d9066960c7ac6f83a278b
parent 317b55c0
......@@ -3770,6 +3770,10 @@ void CEncoder::InitEncoder( bool canAbortOnSpill, bool hasStackCall )
{
vbuilder->SetOption(vISA_ReservedGRFNum, IGC_GET_FLAG_VALUE(ReservedRegisterNum));
}
if (IGC_GET_FLAG_VALUE(GRFNumToUse) > 0)
{
vbuilder->SetOption(vISA_GRFNumToUse, IGC_GET_FLAG_VALUE(GRFNumToUse));
}
vbuilder->SetOption(vISA_TotalGRFNum, context->getNumGRFPerThread());
......
......@@ -48,10 +48,11 @@ DECLARE_IGC_REGKEY(DWORD,UnifiedSendCycle, 0, "Using unified se
DECLARE_IGC_REGKEY(DWORD,DisableMixMode, 0, "Disables mix mode in vISA BE.")
DECLARE_IGC_REGKEY(DWORD,DisableHFMath, 0, "Disables HF math instructions.")
DECLARE_IGC_REGKEY(debugString, VISAOptions, 0, "Options to vISA. Space-separated options.")
DECLARE_IGC_REGKEY(DWORD,ReservedRegisterNum, 0, "Reserve regsiter number for spill cost testing.")
DECLARE_IGC_REGKEY(DWORD,disableIGASyntax, false, "Disables GEN isa text output using IGA and new syntax.")
DECLARE_IGC_REGKEY(DWORD,disableCompaction, false, "Disables compaction.")
DECLARE_IGC_REGKEY(DWORD,TotalGRFNum, 0, "Total GRF used for register allocation.")
DECLARE_IGC_REGKEY(DWORD,ReservedRegisterNum, 0, "Reserve regsiter number for spill cost testing.")
DECLARE_IGC_REGKEY(DWORD, GRFNumToUse, 0, "Set the number of general registers to use (64 to totalGRFNum)")
DECLARE_IGC_REGKEY(bool, ExpandPlane, 0, "Enable pln to mad macro expansion.")
DECLARE_IGC_REGKEY(bool, EnableBCR, false, "Enable bank conflict reduction.")
DECLARE_IGC_REGKEY(bool, GlobalSendVarSplit, false, "Enable global send variable splitting when we are about to spill")
......
......@@ -706,7 +706,7 @@ inline void BinaryEncoding::EncodeDstRegNum(G4_INST* inst, BinInst *mybin, G4_Ds
{
uint32_t byteAddress = dst->getLinearizedStart();
MUST_BE_TRUE(byteAddress < kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum) * GENX_GRF_REG_SIZ, "dst exceeds total GRF number");
MUST_BE_TRUE(byteAddress < kernel.getNumRegTotal() * GENX_GRF_REG_SIZ, "dst exceeds total GRF number");
if (inst->isAligned1Inst())
{
......@@ -1386,7 +1386,7 @@ inline void BinaryEncoding::EncodeSrc0RegNum(G4_INST* inst, BinInst *mybin, G4_O
{
bool repControl = EncodingHelper::GetRepControl(src0);
uint32_t byteAddress = src0->getLinearizedStart();
MUST_BE_TRUE(byteAddress < kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum) * GENX_GRF_REG_SIZ, "src0 exceeds total GRF number");
MUST_BE_TRUE(byteAddress < kernel.getNumRegTotal() * GENX_GRF_REG_SIZ, "src0 exceeds total GRF number");
if (mybin->GetIs3Src())
{
......@@ -2083,7 +2083,7 @@ inline void BinaryEncoding::EncodeSrc1RegNum(G4_INST *inst, BinInst *mybin, G4_O
{
bool repControl = EncodingHelper::GetRepControl(src1);
uint32_t byteAddress = src1->getLinearizedStart();
MUST_BE_TRUE(byteAddress < kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum) * GENX_GRF_REG_SIZ, "src1 exceeds total GRF number");
MUST_BE_TRUE(byteAddress < kernel.getNumRegTotal() * GENX_GRF_REG_SIZ, "src1 exceeds total GRF number");
if (mybin->GetIs3Src())
{
......@@ -2546,7 +2546,7 @@ inline void BinaryEncoding::EncodeSrc2RegNum(G4_INST* inst, BinInst *mybin, G4_O
EncodingHelper::GetSrcAddrMode(src2) == ADDR_MODE_IMMED )
{
uint32_t byteAddress = src2->getLinearizedStart();
MUST_BE_TRUE(byteAddress < kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum) * GENX_GRF_REG_SIZ, "src2 exceeds total GRF number");
MUST_BE_TRUE(byteAddress < kernel.getNumRegTotal() * GENX_GRF_REG_SIZ, "src2 exceeds total GRF number");
// encode dwords
mybin->SetBits(bits3SrcSrc2RegDWord_H, bits3SrcSrc2RegDWord_L, byteAddress >> 2);
......
......@@ -5615,14 +5615,14 @@ unsigned int G4_Kernel::calleeSaveStart()
unsigned int G4_Kernel::getStackCallStartReg()
{
// Last 3 GRFs to be used as scratch
unsigned int totalGRFs = getOptions()->getuInt32Option(vISA_TotalGRFNum);
unsigned int totalGRFs = getNumRegTotal();
unsigned int startReg = totalGRFs - getNumScratchRegs();
return startReg;
}
unsigned int G4_Kernel::getNumCalleeSaveRegs()
{
unsigned int totalGRFs = getOptions()->getuInt32Option(vISA_TotalGRFNum);
unsigned int totalGRFs = getNumRegTotal();
return totalGRFs - calleeSaveStart() - getNumScratchRegs();
}
......
......@@ -388,6 +388,7 @@ public:
}
FlowGraph& getParent() const { return *parent; }
G4_Kernel& getKernel() const;
void addToBBList(int key, G4_BB* b){BBlist[key] = b;}
void clearBBList(){BBlist.clear();}
bool existsInBBList(int key){ return BBlist.find(key) != BBlist.end();}
......@@ -1315,7 +1316,17 @@ public:
major < COMMON_ISA_MAJOR_VER ||
(major == COMMON_ISA_MAJOR_VER && minor <= COMMON_ISA_MINOR_VER),
"CISA version not supported by this JIT-compiler");
numRegTotal = UNDEFINED_VAL;
unsigned totalGRFs = options->getuInt32Option(vISA_TotalGRFNum);
unsigned Val = options->getuInt32Option(vISA_GRFNumToUse);
if (Val > 0)
{
numRegTotal = std::min(Val, totalGRFs);
}
else
{
numRegTotal = totalGRFs;
}
name = NULL;
simdSize = 0;
hasAddrTaken = false;
......@@ -1330,7 +1341,6 @@ public:
gtPinInfo = nullptr;
}
unsigned int totalGRFs = options->getuInt32Option(vISA_TotalGRFNum);
callerSaveLastGRF = ((totalGRFs - 8) / 2) - 1;
}
......@@ -1396,11 +1406,12 @@ public:
void setHasAddrTaken(bool val) { hasAddrTaken = val; }
bool getHasAddrTaken() { return hasAddrTaken; }
void setNumRegTotal(unsigned num) {numRegTotal = num;}
void setName(const char* n) {name = n;}
const char* getName() {return name;}
const char* getOrigCMName() {return name + 2;}
unsigned getNumRegTotal() {return numRegTotal;}
void setNumRegTotal(unsigned num) { numRegTotal = num; }
unsigned getNumRegTotal() const { return numRegTotal; }
void setName(const char* n) { name = n; }
const char* getName() { return name; }
const char* getOrigCMName() { return name + 2; }
void emit_asm(std::ostream& output, bool beforeRegAlloc, void * binary, uint32_t binarySize);
void emit_dep(std::ostream& output);
......@@ -1499,6 +1510,11 @@ public:
};
inline G4_Kernel& G4_BB::getKernel() const
{
return *getParent().getKernel();
}
class SCCAnalysis
{
//
......
......@@ -232,10 +232,13 @@ void G4Verifier::verifySend(G4_INST* inst)
}
return src->getLinearizedStart() >= EOTStart;
};
MUST_BE_TRUE(checkEOTSrc(src0), "src0 for EOT send is not in r112-r127");
if (src1 != nullptr)
if (kernel.getNumRegTotal() >= 128)
{
MUST_BE_TRUE(checkEOTSrc(src1), "src1 for EOT sends is not in r112-r127");
MUST_BE_TRUE(checkEOTSrc(src0), "src0 for EOT send is not in r112-r127");
if (src1 != nullptr)
{
MUST_BE_TRUE(checkEOTSrc(src1), "src1 for EOT sends is not in r112-r127");
}
}
}
......
......@@ -2183,16 +2183,14 @@ void Interference::buildInterferenceWithinBB(G4_BB* bb, BitSet& live)
markInterferenceForSend(bb, inst, dst);
}
// FIXME: revisit this restriction.
//r127 must not be used for return address when there is a src and dest overlap in send instruction.
if (kernel.fg.builder->needsToReserveR127() && liveAnalysis->livenessClass(G4_GRF) && !inst->isSplitSend())
{
if (dst->getBase()->isRegAllocPartaker() && !dst->getBase()->asRegVar()->isPhyRegAssigned())
{
int dstId = dst->getBase()->asRegVar()->getId();
if (kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum) == 128)
{
lrs[dstId]->markForbidden(127, 1);
}
lrs[dstId]->markForbidden(kernel.getNumRegTotal() - 1, 1);
}
}
}
......@@ -2250,7 +2248,7 @@ void Interference::buildInterferenceWithinBB(G4_BB* bb, BitSet& live)
lrs[id]->setEOTSrc();
if (builder.hasEOTGRFBinding())
{
lrs[id]->markForbidden(0, kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum) - 16);
lrs[id]->markForbidden(0, kernel.getNumRegTotal() - 16);
}
}
......@@ -5563,7 +5561,7 @@ bool GraphColor::assignColors(ColorHeuristic colorHeuristicGRF, bool doBankConfl
unsigned bank2_end = totalGRFRegCount - 1;
unsigned bank1_start = 0;
unsigned bank2_start = totalGRFRegCount - 1;
unsigned int totalGRFNum = getOptions()->getuInt32Option(vISA_TotalGRFNum);
unsigned int totalGRFNum = kernel.getNumRegTotal();
bool oneGRFBankDivision = gra.kernel.fg.builder->oneGRFBankDivision();
bool allocFromBanks = liveAnalysis.livenessClass(G4_GRF) && builder.lowHighBundle() &&
!builder.getOptions()->getuInt32Option(vISA_ReservedGRFNum) &&
......@@ -6039,7 +6037,7 @@ bool GraphColor::regAlloc(bool doBankConflictReduction,
{
gra.determineSpillRegSize(spillRegSize, indrSpillRegSize);
reserveSpillSize = spillRegSize + indrSpillRegSize;
MUST_BE_TRUE(reserveSpillSize < getOptions()->getuInt32Option(vISA_TotalGRFNum), "Invalid reserveSpillSize in fail-safe RA!");
MUST_BE_TRUE(reserveSpillSize < kernel.getNumCalleeSaveRegs(), "Invalid reserveSpillSize in fail-safe RA!");
totalGRFRegCount -= reserveSpillSize;
}
......@@ -10851,7 +10849,7 @@ void FlagSpillCleanup::spillFillCodeCleanFlag(IR_Builder& builder,
// this is needed for HRA, and the fake declares will be removed at the end of HRA
void GlobalRA::insertPhyRegDecls()
{
int numGRF = kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum);
int numGRF = kernel.getNumRegTotal();
std::vector<bool> grfUsed;
grfUsed.resize(numGRF, false);
GRFDclsForHRA.resize(numGRF);
......
......@@ -48,7 +48,7 @@ using namespace vISA;
#define GET_BUNDLE(r, o) (((r + o) % 64) / 4)
extern unsigned int getStackCallRegSize(bool reserveStackCallRegs);
extern void getForbiddenGRFs(vector<unsigned int>& regNum, const Options *opt, unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned reservedRegNum);
extern void getForbiddenGRFs(vector<unsigned int>& regNum, G4_Kernel& kernel, unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned reservedRegNum);
extern void getCallerSaveGRF(vector<unsigned int>& regNum, G4_Kernel* kernel);
LocalRA::LocalRA(G4_Kernel& k, bool& h, BankConflictPass& b, GlobalRA& g) :
......@@ -174,7 +174,7 @@ void LocalRA::preLocalRAAnalysis()
unsigned int numRowsEOT = 0;
bool lifetimeOpFound = false;
int numGRF = kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum);
int numGRF = kernel.getNumRegTotal();
// Mark references made to decls to sieve local from global ranges
markReferences(numRowsEOT, lifetimeOpFound);
......@@ -205,13 +205,14 @@ void LocalRA::preLocalRAAnalysis()
if (isStackCall || reservedGRFNum || builder.getOption(vISA_Debug))
{
vector<unsigned int> forbiddenRegs;
getForbiddenGRFs(forbiddenRegs, builder.getOptions(), stackCallRegSize, 0, reservedGRFNum);
getForbiddenGRFs(forbiddenRegs, kernel, stackCallRegSize, 0, reservedGRFNum);
for (unsigned int i = 0; i < forbiddenRegs.size(); i++)
{
unsigned int regNum = forbiddenRegs[i];
pregs->setGRFUnavailable(regNum);
}
// FIXME: this will break if # of GRF is not 128.
if (isStackCall)
{
// Set r60 to r99 as unavailable for local RA since these registers are callee saved
......@@ -361,7 +362,7 @@ bool LocalRA::localRAPass(bool doRoundRobin, bool doBankConflictReduction, bool
printInputLiveIntervals();
#endif
int totalGRFNum = builder.getOptions()->getuInt32Option(vISA_TotalGRFNum);
int totalGRFNum = kernel.getNumRegTotal();
for (BB_LIST_ITER bb_it = kernel.fg.BBs.begin(); bb_it != kernel.fg.BBs.end(); ++bb_it)
{
PhyRegsManager pregManager(localPregs, doBankConflictReduction);
......@@ -392,7 +393,10 @@ bool LocalRA::localRAPass(bool doRoundRobin, bool doBankConflictReduction, bool
printLocalLiveIntervals(curBB, liveIntervals);
#endif
LinearScan ra(gra, builder, liveIntervals, inputIntervals, pregManager, localPregs, mem, summary, numRegLRA, globalLRSize, doRoundRobin, doBankConflictReduction, highInternalConflict, doSplitLLR, kernel.getSimdSize());
LinearScan ra(gra, liveIntervals, inputIntervals, pregManager,
localPregs, mem, summary, numRegLRA, globalLRSize,
doRoundRobin, doBankConflictReduction,
highInternalConflict, doSplitLLR, kernel.getSimdSize());
ra.run(curBB, builder, LLRUseMap);
#ifdef DEBUG_VERBOSE_ON
......@@ -464,7 +468,7 @@ bool LocalRA::localRA(bool& doRoundRobin, bool& doBankConflict)
std::cout << "--local RA--\n";
}
int numGRF = kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum);
int numGRF = kernel.getNumRegTotal();
PhyRegsLocalRA phyRegs(numGRF);
pregs = &phyRegs;
......@@ -1307,7 +1311,7 @@ void LocalRA::calculateInputIntervals()
{
setLexicalID();
int numGRF = kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum);
int numGRF = kernel.getNumRegTotal();
std::vector<uint32_t> inputRegLastRef;
inputRegLastRef.resize(numGRF * G4_GRF_REG_SIZE, UINT_MAX);
......@@ -2407,6 +2411,63 @@ void PhyRegsManager::freeRegs(int regnum, int subregnum, int numwords, int instI
// ********* LinearScan class implementation *********
LinearScan::LinearScan(GlobalRA& g, std::vector<LocalLiveRange*>& localLiveIntervals,
std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>>& inputLivelIntervals,
PhyRegsManager& pregMgr, PhyRegsLocalRA& pregs, Mem_Manager& memmgr, PhyRegSummary* s,
unsigned int numReg, unsigned int glrs, bool roundRobin, bool bankConflict,
bool internalConflict, bool splitLLR, unsigned int simdS)
: gra(g)
, builder(g.builder)
, mem(memmgr)
, pregManager(pregMgr)
, initPregs(pregs)
, liveIntervals(localLiveIntervals)
, inputIntervals(inputLivelIntervals)
, summary(s)
, pregs(g.kernel.getNumRegTotal() * NUM_WORDS_PER_GRF, false)
, simdSize(simdS)
, globalLRSize(glrs)
, numRegLRA(numReg)
, useRoundRobin(roundRobin)
, doBankConflict(bankConflict)
, highInternalConflict(internalConflict)
, doSplitLLR(splitLLR)
{
//register number boundaries
bank1_start = 0;
bank1_end = SECOND_HALF_BANK_START_GRF - globalLRSize / 2 - 1;
if (useRoundRobin) { //From middle to back
bank2_start = SECOND_HALF_BANK_START_GRF + (globalLRSize + 1) / 2;
bank2_end = numRegLRA - 1;
} else { //From back to middle
bank2_start = numRegLRA - 1;
bank2_end = SECOND_HALF_BANK_START_GRF + (globalLRSize + 1) / 2;
}
//register number pointers
bank1StartGRFReg = bank1_start;
bank2StartGRFReg = bank2_start;
//register pointer
startGRFReg = &bank1StartGRFReg;
int bank1AvailableRegNum = 0;
for (int i = 0; i < SECOND_HALF_BANK_START_GRF; i++) {
if (pregManager.getAvaialableRegs()->isGRFAvailable(i) && !pregManager.getAvaialableRegs()->isGRFBusy(i)) {
bank1AvailableRegNum++;
}
}
pregManager.getAvaialableRegs()->setBank1AvailableRegNum(bank1AvailableRegNum);
int bank2AvailableRegNum = 0;
for (unsigned int i = SECOND_HALF_BANK_START_GRF; i < numRegLRA; i++) {
if (pregManager.getAvaialableRegs()->isGRFAvailable(i) && !pregManager.getAvaialableRegs()->isGRFBusy(i)) {
bank2AvailableRegNum++;
}
}
pregManager.getAvaialableRegs()->setBank2AvailableRegNum(bank2AvailableRegNum);
}
// Linear scan implementation
void LinearScan::run(G4_BB* bb, IR_Builder& builder, LLR_USE_MAP& LLRUseMap)
{
......@@ -3055,7 +3116,7 @@ bool LinearScan::allocateRegsFromBanks(LocalLiveRange* lr)
if (*startGRFReg < SECOND_HALF_BANK_START_GRF)
{
*startGRFReg += bank2_start;
if (*startGRFReg >= builder.getOptions()->getuInt32Option(vISA_TotalGRFNum))
if (*startGRFReg >= gra.kernel.getNumRegTotal())
{
*startGRFReg = bank2_start;
return false;
......
......@@ -445,33 +445,32 @@ public:
PhyRegsLocalRA * getAvaialableRegs() { return &availableRegs; }
};
class LinearScan
{
class LinearScan {
private:
GlobalRA& gra;
IR_Builder& builder;
Mem_Manager& mem;
PhyRegsManager& pregManager;
Mem_Manager& mem;
PhyRegsManager& pregManager;
PhyRegsLocalRA& initPregs;
std::vector<LocalLiveRange*>& liveIntervals;
std::vector<LocalLiveRange*>& liveIntervals;
std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>>& inputIntervals;
std::list<LocalLiveRange*> active;
PhyRegSummary* summary;
std::list<LocalLiveRange*> active;
PhyRegSummary* summary;
void expireRanges( unsigned int );
void expireInputRanges( unsigned int, unsigned int, unsigned int );
void expireAllActive();
bool allocateRegsFromBanks( LocalLiveRange* );
void expireRanges(unsigned int);
void expireInputRanges(unsigned int, unsigned int, unsigned int);
void expireAllActive();
bool allocateRegsFromBanks(LocalLiveRange*);
bool allocateRegs(LocalLiveRange*, G4_BB* bb, IR_Builder& builder, LLR_USE_MAP& LLRUseMap);
void freeAllocedRegs( LocalLiveRange*, bool);
void updateActiveList( LocalLiveRange* );
void updateBitset( LocalLiveRange* );
void freeAllocedRegs(LocalLiveRange*, bool);
void updateActiveList(LocalLiveRange*);
void updateBitset(LocalLiveRange*);
BitSet pregs;
unsigned int simdSize;
BitSet pregs;
unsigned int simdSize;
unsigned int globalLRSize;
unsigned int *startGRFReg;
unsigned int* startGRFReg;
unsigned int numRegLRA;
unsigned int bank1StartGRFReg;
......@@ -487,61 +486,13 @@ private:
bool doSplitLLR;
public:
LinearScan(GlobalRA& g, IR_Builder& pBuilder, std::vector<LocalLiveRange*>& localLiveIntervals,
std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>>& inputLivelIntervals, PhyRegsManager& pregMgr, PhyRegsLocalRA& pregs,
Mem_Manager& memmgr, PhyRegSummary* s, unsigned int numReg, unsigned int glrs,
bool roundRobin, bool bankConflict, bool internalConflict, bool splitLLR, unsigned int simdS)
: builder(pBuilder), mem(memmgr), pregManager(pregMgr), initPregs(pregs),
liveIntervals(localLiveIntervals), inputIntervals(inputLivelIntervals), summary(s),
pregs(pBuilder.getOptions()->getuInt32Option(vISA_TotalGRFNum) * NUM_WORDS_PER_GRF, false), simdSize(simdS),
globalLRSize(glrs), numRegLRA(numReg), useRoundRobin(roundRobin), doBankConflict(bankConflict), highInternalConflict(internalConflict), doSplitLLR(splitLLR),
gra(g)
{
//register number boundaries
bank1_start = 0;
bank1_end = SECOND_HALF_BANK_START_GRF - globalLRSize / 2 - 1;
if (useRoundRobin)
{//From middle to back
bank2_start = SECOND_HALF_BANK_START_GRF + (globalLRSize + 1) / 2;
bank2_end = numRegLRA - 1;
}
else
{ //From back to middle
bank2_start = numRegLRA - 1;
bank2_end = SECOND_HALF_BANK_START_GRF + (globalLRSize + 1) / 2;
}
//register number pointers
bank1StartGRFReg = bank1_start;
bank2StartGRFReg = bank2_start;
//register pointer
startGRFReg = &bank1StartGRFReg;
int bank1AvailableRegNum = 0;
for (int i = 0; i < SECOND_HALF_BANK_START_GRF; i++)
{
if (pregManager.getAvaialableRegs()->isGRFAvailable(i) && !pregManager.getAvaialableRegs()->isGRFBusy(i))
{
bank1AvailableRegNum++;
}
}
pregManager.getAvaialableRegs()->setBank1AvailableRegNum(bank1AvailableRegNum);
int bank2AvailableRegNum = 0;
for (unsigned int i = SECOND_HALF_BANK_START_GRF; i < numRegLRA; i++)
{
if (pregManager.getAvaialableRegs()->isGRFAvailable(i) && !pregManager.getAvaialableRegs()->isGRFBusy(i))
{
bank2AvailableRegNum++;
}
}
pregManager.getAvaialableRegs()->setBank2AvailableRegNum(bank2AvailableRegNum);
}
void run(G4_BB* bb, IR_Builder& builder, LLR_USE_MAP& LLRUseMap );
LinearScan(GlobalRA& g, std::vector<LocalLiveRange*>& localLiveIntervals,
std::list<InputLiveRange*, std_arena_based_allocator<InputLiveRange*>>& inputLivelIntervals,
PhyRegsManager& pregMgr, PhyRegsLocalRA& pregs, Mem_Manager& memmgr, PhyRegSummary* s,
unsigned int numReg, unsigned int glrs, bool roundRobin, bool bankConflict,
bool internalConflict, bool splitLLR, unsigned int simdS);
void run(G4_BB* bb, IR_Builder& builder, LLR_USE_MAP& LLRUseMap);
};
class PhyRegSummary
......
......@@ -472,9 +472,9 @@ private:
} // namespace
static unsigned getRPReductionThreshold(Options *m_options, G4_Kernel &kernel)
static unsigned getRPReductionThreshold(G4_Kernel &kernel)
{
unsigned NumGrfs = m_options->getuInt32Option(vISA_TotalGRFNum);
unsigned NumGrfs = kernel.getNumRegTotal();
float Ratio = NumGrfs / 128.0f;
// For SIMD32 kernels, use a higher threshold for rp reduction,
......@@ -486,11 +486,11 @@ static unsigned getRPReductionThreshold(Options *m_options, G4_Kernel &kernel)
return unsigned(PRESSURE_REDUCTION_THRESHOLD * Ratio);
}
static unsigned getLatencyHidingThreshold(Options *m_options)
static unsigned getLatencyHidingThreshold(G4_Kernel &kernel)
{
unsigned NumGrfs = m_options->getuInt32Option(vISA_TotalGRFNum);
unsigned NumGrfs = kernel.getNumRegTotal();
float Ratio = NumGrfs / 128.0f;
unsigned RPThreshold = m_options->getuInt32Option(vISA_preRA_ScheduleRPThreshold);
unsigned RPThreshold = kernel.getOptions()->getuInt32Option(vISA_preRA_ScheduleRPThreshold);
if (RPThreshold > 0)
{
return unsigned(RPThreshold * Ratio);
......@@ -517,7 +517,7 @@ bool preRA_Scheduler::run()
return false;
}
unsigned Threshold = getRPReductionThreshold(m_options, kernel);
unsigned Threshold = getRPReductionThreshold(kernel);
unsigned SchedCtrl = m_options->getuInt32Option(vISA_preRA_ScheduleCtrl);
LatencyTable LT(kernel.fg.builder);
......@@ -563,7 +563,7 @@ bool preRA_Scheduler::run()
if (!config.UseLatency)
return false;
if (MaxPressure >= getLatencyHidingThreshold(m_options))
if (MaxPressure >= getLatencyHidingThreshold(kernel))
return false;
// simple ROI check.
......@@ -1279,7 +1279,7 @@ void LatencyQueue::init()
// and starts a new group.
//
std::vector<unsigned> Segments;
unsigned Threshold = getLatencyHidingThreshold(ddd.getOptions());
unsigned Threshold = getLatencyHidingThreshold(ddd.getKernel());
mergeSegments(RPtrace, Max, Min, Segments, Threshold);
// Iterate segments and assign a group id to each insstruction.
......@@ -1518,7 +1518,7 @@ bool BB_Scheduler::commitIfBeneficial(unsigned& MaxRPE, bool IsTopDown)
rp.recompute(getBB());
unsigned NewRPE = rp.getPressure(getBB());
unsigned LatencyPressureThreshold = getLatencyHidingThreshold(kernel.getOptions());
unsigned LatencyPressureThreshold = getLatencyHidingThreshold(kernel);
if (config.UseLatency && IsTopDown) {
// For hiding latency.
if (NewRPE <= LatencyPressureThreshold) {
......
......@@ -731,7 +731,7 @@ DDD::DDD(Mem_Manager& m, G4_BB* bb, const LatencyTable& lt, G4_Kernel* k)
Node* lastBarrier = nullptr;
HWthreadsPerEU = getBuilder()->getHWThreadNumberPerEU();
useMTLatencies = getBuilder()->useMultiThreadLatency();
totalGRFNum = getOptions()->getuInt32Option(vISA_TotalGRFNum);
totalGRFNum = kernel->getNumRegTotal();
bool BTIIsRestrict = getOptions()->getOption(vISA_ReorderDPSendToDifferentBti);
GRF_BUCKET = 0;
......
......@@ -7399,7 +7399,7 @@ public:
}
NSDS(const Options *options, G4_BB *bb) {
int totalGRFNum = options->getuInt32Option(vISA_TotalGRFNum);
int totalGRFNum = bb->getKernel().getNumRegTotal();
int TOTAL_BUCKETS = 0;
GRF_BUCKET = TOTAL_BUCKETS;
TOTAL_BUCKETS += totalGRFNum;
......@@ -7676,7 +7676,7 @@ public:
void Optimizer::countGRFUsage()
{
unsigned int maxGRFNum = builder.getOptions()->getuInt32Option(vISA_TotalGRFNum);
unsigned int maxGRFNum = kernel.getNumRegTotal();
int count = 0;
bool *GRFUse = (bool *) builder.mem.alloc(sizeof(bool) * maxGRFNum);
for (unsigned int i = 0; i < maxGRFNum; ++i)
......
......@@ -1222,7 +1222,7 @@ unsigned LiveRange::getForbiddenVectorSize()
{
case G4_GRF:
case G4_INPUT:
return gra.kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum);
return gra.kernel.getNumRegTotal();
case G4_ADDRESS:
return getNumAddrRegisters();
case G4_FLAG:
......@@ -1259,24 +1259,24 @@ unsigned int getStackCallRegSize(bool reserveStackCallRegs)
}
}
void getForbiddenGRFs(vector<unsigned int>& regNum, const Options *opt, unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned rerservedRegNum)
void getForbiddenGRFs(vector<unsigned int>& regNum, G4_Kernel &kernel, unsigned stackCallRegSize, unsigned reserveSpillSize, unsigned rerservedRegNum)
{
// Push forbidden register numbers to vector regNum
//
// r0 - Forbidden when platform is not 3d
// rMax, rMax-1, rMax-2 - Forbidden in presence of stack call sites
unsigned totalGRFNum = opt->getuInt32Option(vISA_TotalGRFNum);
unsigned totalGRFNum = kernel.getNumRegTotal();
if (opt->getTarget() != VISA_3D ||
opt->getOption(vISA_enablePreemption) ||
reserveSpillSize > 0 ||
if (kernel.getOptions()->getTarget() != VISA_3D ||
kernel.getOption(vISA_enablePreemption) ||
reserveSpillSize > 0 ||
stackCallRegSize > 0 ||
opt->getOption(vISA_ReserveR0))
kernel.getOption(vISA_ReserveR0))
{
regNum.push_back(0);
}
if (opt->getOption(vISA_enablePreemption))
if (kernel.getOption(vISA_enablePreemption))
{
// r1 is reserved for SIP kernel
regNum.push_back(1);
......@@ -1342,7 +1342,7 @@ void LiveRange::allocForbidden(Mem_Manager& mem, bool reserveStackCallRegs, unsi
{
vector<unsigned int> forbiddenGRFs;
unsigned int stackCallRegSize = getStackCallRegSize(reserveStackCallRegs);
getForbiddenGRFs(forbiddenGRFs, gra.kernel.getOptions(), stackCallRegSize, reserveSpillSize, rerservedRegNum);
getForbiddenGRFs(forbiddenGRFs, gra.kernel, stackCallRegSize, reserveSpillSize, rerservedRegNum);
for (unsigned int i = 0; i < forbiddenGRFs.size(); i++)
{
......@@ -1449,6 +1449,6 @@ PhyRegUsageParms::PhyRegUsageParms(GlobalRA& g, LiveRange* l[], G4_RegFileKind r
weakEdgeUsage = weakEdges;
maxGRFCanBeUsed = m;
rFile = r;
totalGRF = gra.kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum);
totalGRF = gra.kernel.getNumRegTotal();
lrs = l;
}
......@@ -3409,7 +3409,7 @@ void GlobalRA::verifyRA(LivenessAnalysis & liveAnalysis)
}
}
int numGRF = kernel.getOptions()->getuInt32Option(vISA_TotalGRFNum);
int numGRF = kernel.getNumRegTotal();
// Verify Live-in
std::map<uint32_t, G4_Declare*> LiveInRegMap;
std::map<uint32_t, G4_Declare*>::iterator LiveInRegMapIt;
......@@ -4000,26 +4000,15 @@ static void recordRAStats(IR_Builder& builder,
int regAlloc(IR_Builder& builder, PhyRegPool& regPool, G4_Kernel& kernel)
{
//
// if no .reg_count_total, set 128 as the default value
//
if(kernel.getNumRegTotal() == UNDEFINED_VAL)
{
kernel.setNumRegTotal(builder.getOptions()->getuInt32Option(vISA_TotalGRFNum));
}
if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())
{
if (builder.getOptions()->getuInt32Option(vISA_TotalGRFNum) < G4_DEFAULT_GRF_NUM)
{
if (kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc())
{
if (kernel.getNumRegTotal() < G4_DEFAULT_GRF_NUM)
{
MUST_BE_TRUE(false, "total GRF number <128, cannot handle stack call!");
}
}
}
}
kernel.fg.reassignBlockIDs();
//kernel.fg.markSimdBlocks();
//kernel.fg.findBackEdges();
if (kernel.getOptions()->getTarget() == VISA_3D)
{
......
......@@ -211,9 +211,13 @@ namespace vISA
Rematerialization(G4_Kernel& k, LivenessAnalysis& l, GraphColor& c, RPE& r) :
kernel(k), liveness(l), coloring(c), doms(k.fg), rpe(r)
{
unsigned int numGRFs = k.getOptions()->getuInt32Option(vISA_TotalGRFNum);
rematLoopRegPressure = numGRFs - (128 - cRematLoopRegPressure128GRF);
rematRegPressure = numGRFs - (128 - cRematRegPressure128GRF);
unsigned numGRFs = k.getNumRegTotal();
auto scale = [=](unsigned threshold) -> unsigned {
float ratio = 1.0f - (128 - threshold) / 128.0f;
return static_cast<unsigned>(numGRFs * ratio);
};
rematLoopRegPressure = scale(cRematLoopRegPressure128GRF);
rematRegPressure = scale(cRematRegPressure128GRF);
rematCandidates.resize(l.getNumSelectedVar(), false);
......
......@@ -106,9 +106,13 @@ namespace vISA
SpillManagerGMRF& s, unsigned int iterationNo, RPE& r) :
kernel(k), liveness(l), graphColor(g), spill(s), rpe(r)
{
unsigned int numGRFs = k.getOptions()->getuInt32Option(vISA_TotalGRFNum);
fillWindowSizeThreshold = numGRFs - (128 - cFillWindowThreshold128GRF);
spillWindowSizeThreshold = numGRFs - (128 - cSpillWindowThreshold128GRF);
unsigned int numGRFs = k.getNumRegTotal();
auto scale = [=](unsigned threshold) -> unsigned {
float ratio = 1.0f - (128 - threshold) / 128.0f;
return static_cast<unsigned>(numGRFs * ratio);
};
fillWindowSizeThreshold = scale(cFillWindowThreshold128GRF);
spillWindowSizeThreshold = scale(cSpillWindowThreshold128GRF);
iterationNo = iterNo;
......
......@@ -124,65 +124,75 @@ static void setNewDclAlignment(G4_Declare* newDcl, G4_Align origAlign)
// Constructor
SpillManagerGMRF::SpillManagerGMRF (
GlobalRA& g,
unsigned spillAreaOffset,
unsigned varIdCount,
const LivenessAnalysis * lvInfo,
LiveRange ** lrInfo,