Commit 48c12f5c authored by Buqi Cheng's avatar Buqi Cheng Committed by Wesierski, Lukasz

Synchronisation between branches

Change-Id: I5ca186b14ff128baaed9c0df08ae0611a064d6ff
parent 95bf0d08
......@@ -3515,7 +3515,7 @@ void CEncoder::InitEncoder( bool canAbortOnSpill, bool hasStackCall )
{
vbuilder->SetOption(vISA_DumpCompilerStats, true);
}
if (context->type == ShaderType::OPENCL_SHADER && context->m_floatDenormMode32 == FLOAT_DENORM_RETAIN &&
context->m_floatDenormMode64 == FLOAT_DENORM_RETAIN)
{
......@@ -4799,7 +4799,8 @@ void CEncoder::Gather4ScaledNd(CVariable *dst,
ISA_GATHER4_SCALED,
predOpnd,
GetAluEMask(dst),
visaExecSize(m_encoderState.m_simdSize),
visaExecSize(offset->IsUniform() ? lanesToSIMDMode(offset->GetNumberElement()) :
m_encoderState.m_simdSize),
ConvertChannelMaskToVisaType(BIT(nd) - 1),
surfaceOpnd,
globalOffsetOpnd,
......@@ -4811,14 +4812,26 @@ void CEncoder::Gather4Scaled(CVariable *dst,
const ResourceDescriptor& resource,
CVariable *offset) {
unsigned nd = dst->GetSize();
switch (m_encoderState.m_simdSize) {
default: assert(false && "Unknown SIMD size!"); return;
case SIMDMode::SIMD8:
nd = nd / (SIZE_GRF * 1);
break;
case SIMDMode::SIMD16:
nd = nd / (SIZE_GRF * 2);
break;
if (dst->IsUniform())
{
if (nd > SIZE_GRF)
{
assert(false && "Unknown DstSize!");
return;
}
nd = 1;
}
else
{
switch (m_encoderState.m_simdSize) {
default: assert(false && "Unknown SIMD size!"); return;
case SIMDMode::SIMD8:
nd = nd / (SIZE_GRF * 1);
break;
case SIMDMode::SIMD16:
nd = nd / (SIZE_GRF * 2);
break;
}
}
Gather4ScaledNd(dst, resource, offset, nd);
}
......@@ -4827,14 +4840,26 @@ void CEncoder::Scatter4Scaled(CVariable *src,
const ResourceDescriptor& resource,
CVariable *offset) {
unsigned nd = src->GetSize();
switch (m_encoderState.m_simdSize) {
default: assert(false && "Unknown SIMD size!"); return;
case SIMDMode::SIMD8:
nd = nd / (SIZE_GRF * 1);
break;
case SIMDMode::SIMD16:
nd = nd / (SIZE_GRF * 2);
break;
if (src->IsUniform())
{
if (nd > SIZE_GRF)
{
assert(false && "Unknown SrcSize!");
return;
}
nd = 1;
}
else
{
switch (m_encoderState.m_simdSize) {
default: assert(false && "Unknown SIMD size!"); return;
case SIMDMode::SIMD8:
nd = nd / (SIZE_GRF * 1);
break;
case SIMDMode::SIMD16:
nd = nd / (SIZE_GRF * 2);
break;
}
}
VISA_StateOpndHandle* surfaceOpnd = GetVISASurfaceOpnd(resource);
......@@ -4850,7 +4875,8 @@ void CEncoder::Scatter4Scaled(CVariable *src,
ISA_SCATTER4_SCALED,
predOpnd,
GetAluEMask(src),
visaExecSize(m_encoderState.m_simdSize),
visaExecSize(offset->IsUniform() ? lanesToSIMDMode(offset->GetNumberElement()) :
m_encoderState.m_simdSize),
ConvertChannelMaskToVisaType(BIT(nd) - 1),
surfaceOpnd,
globalOffsetOpnd,
......@@ -5333,6 +5359,4 @@ void CEncoder::Lifetime(VISAVarLifetime StartOrEnd, CVariable* dst)
V(vKernel->AppendVISALifetime(StartOrEnd, srcOpnd));
}
}
......@@ -112,40 +112,27 @@ DeSSA::DeSSA() : FunctionPass( ID )
void DeSSA::print(raw_ostream &OS, const Module* ) const
{
Banner(OS, "Phi-Var Isolations");
DenseMap<Node*, int> LeaderVisited;
for (auto I = RegNodeMap.begin(),
E = RegNodeMap.end(); I != E; ++I) {
Node* N = I->second;
// We don't want to change behavior of DeSSA by invoking
// dumping/printing functions. Thus, don't use getLeader()
// as it has side-effect (doing path halving).
Node* Leader = N->parent.getPointer();
while (Leader != Leader->parent.getPointer()) {
Leader = Leader->parent.getPointer();
}
if (LeaderVisited.count(Leader)) {
continue;
Value *VL = I->first;
Value *RootV = getRegRoot(VL);
if (RootV) {
VL->print(IGC::Debug::ods());
OS << " : ";
RootV->print(IGC::Debug::ods());
}
LeaderVisited[Leader] = 1;
Value *VL;
if (isIsolated(N)) {
VL = N->value;
else {
OS << "Var isolated : ";
VL->print(IGC::Debug::ods());
OS << "\n";
} else {
OS << "Leader : ";
Leader->value->print(IGC::Debug::ods());
OS << "\n";
N = Leader->next;
while (N != Leader) {
VL = N->value;
OS << " ";
}
PHINode *PHI = dyn_cast<PHINode>(VL);
if (PHI) {
if (isPHIIsolated(PHI)) {
OS << "\nPHI isolated : ";
VL->print(IGC::Debug::ods());
OS << "\n";
N = N->next;
}
}
OS << "\n";
}
}
......@@ -161,7 +148,7 @@ bool DeSSA::runOnFunction(Function &MF)
{
return false;
}
auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
CTX = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
WIA = &getAnalysis<WIAnalysis>();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
......@@ -255,7 +242,7 @@ bool DeSSA::runOnFunction(Function &MF)
break;
}
e_alignment DefAlign = GetPreferredAlignment(PHI, WIA, pCtx);
e_alignment DefAlign = GetPreferredAlignment(PHI, WIA, CTX);
assert(PHI == getInsEltRoot(PHI));
addReg(PHI, DefAlign);
PHISrcDefs[&(*I)].push_back(PHI);
......@@ -273,7 +260,7 @@ bool DeSSA::runOnFunction(Function &MF)
PHILoopPreHeaderSrcs.count(OrigSrcVal) > 0 &&
PHILoopPreHeaderSrcs[OrigSrcVal] >= PHI_SRC_USE_THRESHOLD);
// add src to the union
e_alignment SrcAlign = GetPreferredAlignment(OrigSrcVal, WIA, pCtx);
e_alignment SrcAlign = GetPreferredAlignment(OrigSrcVal, WIA, CTX);
Value *SrcVal = getInsEltRoot(OrigSrcVal);
Instruction *DefMI = dyn_cast<Instruction>(SrcVal);
......@@ -301,7 +288,7 @@ bool DeSSA::runOnFunction(Function &MF)
// isolate complex type that IGC does not handle
if (PHI->getType()->isStructTy() ||
PHI->getType()->isArrayTy()) {
isolateReg(PHI);
isolatePHI(PHI);
}
}
}
......@@ -358,7 +345,7 @@ Value* DeSSA::getRegRoot(Value* Val, e_alignment *pAlign) const {
if (RI == RegNodeMap.end())
return 0;
Node *TheNode = RI->second;
if (isIsolated(TheNode))
if (TheNode->parent.getInt() & Node::kRegisterIsolatedFlag)
return 0x0;
Node *TheLeader = TheNode->getLeader();
if (pAlign)
......@@ -372,7 +359,8 @@ int DeSSA::getRootColor(Value* V)
if (RI == RegNodeMap.end())
return 0;
Node *TheNode = RI->second;
if (isIsolated(TheNode))
if (TheNode->parent.getInt() &
(Node::kRegisterIsolatedFlag | Node::kPHIIsolatedFlag))
return 0;
Node *TheLeader = TheNode->getLeader();
return TheLeader->color;
......@@ -414,7 +402,7 @@ void DeSSA::MapUnionRegs(MapVector<Value*, Node*> &Map, Value* Val1, Value* Val2
void DeSSA::isolateReg(Value* Val) {
Node *Node = RegNodeMap[Val];
Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
}
Value* DeSSA::getOrigRoot(Instruction *PHI) const {
......@@ -430,7 +418,9 @@ Value* DeSSA::getPHIRoot(Instruction *PHI) const {
auto RI = RegNodeMap.find(PHI);
assert (RI != RegNodeMap.end());
Node *DestNode = RI->second;
if (isIsolated(DestNode))
if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
return 0x0;
if (DestNode->parent.getInt() & Node::kRegisterIsolatedFlag)
return 0x0;
return DestNode->getLeader()->value;
}
......@@ -443,14 +433,11 @@ void DeSSA::isolatePHI(Instruction *PHI) {
bool DeSSA::isPHIIsolated(Instruction *PHI) const {
auto RI = RegNodeMap.find(PHI);
if (RI == RegNodeMap.end()) {
return true;
}
assert (RI != RegNodeMap.end());
Node *DestNode = RI->second;
return isIsolated(DestNode);
return ((DestNode->parent.getInt() & Node::kPHIIsolatedFlag) > 0 ? true : false);
}
/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
/// interferences found between registers in the same congruence class. It
/// takes two DenseMaps as arguments that it also updates:
......@@ -524,7 +511,7 @@ DeSSA::SplitInterferencesForBasicBlock(
// Pop registers from the stack represented by ImmediateDominatingParent
// until we find a parent that dominates the current instruction.
while (NewParent) {
if (getRegRoot(NewParent)) {
if (getRootColor(NewParent)) {
// we have added the another condition because the domination-test
// does not work between two phi-node. See the following comments
// from the DT::dominates:
......@@ -574,8 +561,24 @@ DeSSA::SplitInterferencesForBasicBlock(
if (!PHI) {
break;
}
// skip phi-isolated
if (isPHIIsolated(PHI)) {
int RootC = getRootColor(PHI);
// check live-out interference
if (IGC_IS_FLAG_ENABLED(EnableDeSSAWA) && !RootC)
{
// [todo] delete this code
if (CTX->type == ShaderType::COMPUTE_SHADER)
{
for (unsigned i = 0; !RootC && i < PHI->getNumOperands(); i++) {
Value* SrcVal = PHI->getOperand(i);
if (!isa<Constant>(SrcVal)) {
RootC = getRootColor(SrcVal);
}
}
}
}
if (!RootC) {
continue;
}
// Find the index of the PHI operand that corresponds to this basic block.
......@@ -588,14 +591,14 @@ DeSSA::SplitInterferencesForBasicBlock(
Value* PredValue = PHI->getOperand(PredIndex);
PredValue = getInsEltRoot(PredValue);
// check potential cyclic phi-move dependency
Value *OrigRootV = getOrigRoot(PHI);
std::pair<Instruction*, Value*> &CurrentPHI = CurrentPHIForColor[OrigRootV];
//Value *OrigRootV = getOrigRoot(PHI);
std::pair<Instruction*, Value*> &CurrentPHI = CurrentPHIForColor[RootC];
// If two PHIs have the same operand from every shared predecessor, then
// they don't actually interfere. Otherwise, isolate the current PHI. This
// could possibly be improved, e.g. we could isolate the PHI with the
// fewest operands.
if (CurrentPHI.first && CurrentPHI.second != PredValue) {
isolateReg(PHI);
isolatePHI(PHI);
continue;
}
else {
......@@ -603,26 +606,25 @@ DeSSA::SplitInterferencesForBasicBlock(
}
// check live-out interference
int RootC = getRootColor(PHI);
#if 0
Value *RootV = getRegRoot(PHI);
for (unsigned i = 0; !RootV && i < PHI->getNumOperands(); i++) {
Value* SrcVal = PHI->getOperand(i);
if (!isa<Constant>(SrcVal)) {
Value *SrcRootV = getRegRoot(SrcVal);
if (SrcRootV && SrcRootV == OrigRootV) {
RootV = SrcRootV;
Value* SrcVal = PHI->getOperand(i);
if (!isa<Constant>(SrcVal)) {
Value *SrcRootV = getRegRoot(SrcVal);
if (SrcRootV && SrcRootV == OrigRootV) {
RootV = SrcRootV;
printf("JGU_DEBUG: enter this code!\n");
}
}
}
}
#endif
if (!RootC)
continue;
// Pop registers from the stack represented by ImmediateDominatingParent
// until we find a parent that dominates the current instruction.
Value *NewParent = CurrentDominatingParent[RootC];
while (NewParent) {
if (getRegRoot(NewParent)) {
if (getRootColor(NewParent)) {
if (isa<Argument>(NewParent)) {
break;
} else if (DT->dominates(cast<Instruction>(NewParent)->getParent(), MBB)) {
......@@ -685,7 +687,8 @@ void DeSSA::SplitInterferencesForAlignment()
N = Curr->next;
// Skip isolated reg.
if (isIsolated(Curr)) {
if (Curr->parent.getInt() &
(Node::kRegisterIsolatedFlag | Node::kPHIIsolatedFlag)) {
continue;
}
......@@ -709,7 +712,8 @@ void DeSSA::SplitInterferencesForAlignment()
N = N->next;
// Skip isolated reg.
if (isIsolated(Curr)) {
if (Curr->parent.getInt() &
(Node::kRegisterIsolatedFlag | Node::kPHIIsolatedFlag)) {
continue;
}
......@@ -828,7 +832,8 @@ void DeSSA::getAllValuesInCongruentClass(
Node* First = RI->second;
Node* N = First->next;
do {
if (isIsolated(N)) {
if (N->parent.getInt() &
(Node::kPHIIsolatedFlag | Node::kRegisterIsolatedFlag)) {
N = N->next;
continue;
}
......
......@@ -200,9 +200,6 @@ class DeSSA : public llvm::FunctionPass {
/// Isolate a PHI.
void isolatePHI(llvm::Instruction*);
/// Is it isolated (single-valued congruent class)
bool isIsolated(Node* N) const { return (N == N->next); }
/// Traverses a basic block, splitting any interferences found between
/// registers in the same congruence class. It takes two DenseMaps as
/// arguments that it also updates: CurrentDominatingParent, which maps
......@@ -230,6 +227,7 @@ class DeSSA : public llvm::FunctionPass {
llvm::LoopInfo *LI;
CodeGenPatternMatch *CG;
const llvm::DataLayout *DL;
CodeGenContext* CTX;
llvm::BumpPtrAllocator Allocator;
// Color (label) assigned to each congruent class
......@@ -249,7 +247,7 @@ public:
// Maps a color to a pair of a llvm::Instruction* and a virtual register, which
// is the operand of that PHI corresponding to the current basic block.
llvm::DenseMap<llvm::Value*, std::pair<llvm::Instruction*, llvm::Value*> > CurrentPHIForColor;
llvm::DenseMap<int, std::pair<llvm::Instruction*, llvm::Value*> > CurrentPHIForColor;
// Implement reuse for InsertElement only
// Hierarchical coalescing:
......
......@@ -10281,9 +10281,9 @@ void EmitPass::emitPreOrPostFixOpScalar(
if (isPrefix)
{
// For case where we need the prefix shift the source by 1 lane
if (i == 0)
{
{
// (W) mov (1) result[0] identity
m_encoder->Copy(result[i], pIdentityValue);
}
......
......@@ -228,8 +228,7 @@ bool VectorProcess::reLayoutLoadStore(Instruction* Inst)
{
Ty = LI->getType();
}
else
if (SI)
else if (SI)
{
Ty = SI->getOperand(0)->getType();
}
......
......@@ -67,6 +67,7 @@ DECLARE_IGC_REGKEY(DWORD, LoopSinkMinSave, 5, "If loop sink can ha
DECLARE_IGC_REGKEY(DWORD, LoopSinkThresholdDelta, 50, "Do loop sink If the estimated register pressure is higher than this + #avaialble registers")
DECLARE_IGC_REGKEY(bool, DisableCodeHoisting, false, "Setting this to 1/true adds a compiler switch to disable code-hoisting")
DECLARE_IGC_REGKEY(bool, DisableDeSSA, false, "Setting this to 1/true adds a compiler switch to disable optimized De-SSA")
DECLARE_IGC_REGKEY(bool, EnableDeSSAWA, true, "[tmp]Keep some piece of code to avoid perf regression")
DECLARE_IGC_REGKEY(bool, DisablePayloadCoalescing, false, "Setting this to 1/true adds a compiler switch to disable payload coalescing optimization for all types")
DECLARE_IGC_REGKEY(bool, DisablePayloadCoalescing_RT, false, "Setting this to 1/true adds a compiler switch to disable payload coalescing optimization for RT only")
DECLARE_IGC_REGKEY(bool, DisablePayloadCoalescing_Sample, false, "Setting this to 1/true adds a compiler switch to disable payload coalescing optimization for Samplers only")
......
......@@ -5465,10 +5465,19 @@ int IR_Builder::translateGather4Inst(G4_Predicate *pred,
startTimer(TIMER_VISA_BUILDER_IR_CONSTRUCTION);
#endif
ASSERT_USER(execSize == EXEC_SIZE_8 || execSize == EXEC_SIZE_16,
"Only support SIMD8 or SIMD16!");
ASSERT_USER(execSize == EXEC_SIZE_1 || execSize == EXEC_SIZE_2 ||
execSize == EXEC_SIZE_4 || execSize == EXEC_SIZE_8 ||
execSize == EXEC_SIZE_16,
"Only support SIMD1, SIMD2, SIMD4, SIMD8 or SIMD16!");
Common_ISA_Exec_Size instExecSize = execSize;
if (execSize == EXEC_SIZE_1 || execSize == EXEC_SIZE_2 ||
execSize == EXEC_SIZE_4) {
execSize = EXEC_SIZE_8;
}
unsigned exSize = Get_Common_ISA_Exec_Size(execSize);
unsigned instExSize = Get_Common_ISA_Exec_Size(instExecSize);
unsigned instOpt = Get_Gen4_Emask(eMask, exSize);
bool useSplitSend = useSends();
......@@ -5479,7 +5488,7 @@ int IR_Builder::translateGather4Inst(G4_Predicate *pred,
if (!globalOffset->isImm() || globalOffset->asImm()->getImm() != 0) {
G4_Declare *dcl = Create_MRF_Dcl(exSize, offsets->getType());
G4_DstRegRegion *tmp = Create_Dst_Opnd_From_Dcl(dcl, 1);
createInst(pred, G4_add, 0, false, exSize, tmp, offsets, globalOffset, instOpt);
createInst(pred, G4_add, 0, false, instExSize, tmp, offsets, globalOffset, instOpt);
offsets = Create_Src_Opnd_From_Dcl(dcl, getRegionStride1());
}
......@@ -5525,7 +5534,7 @@ int IR_Builder::translateGather4Inst(G4_Predicate *pred,
Create_Send_Inst_For_CISA(pred, dst,
msgs[0], sizes[0],
resLen,
exSize,
instExSize,
MD, sfid,
false, useHeader,
true, false,
......@@ -5535,7 +5544,7 @@ int IR_Builder::translateGather4Inst(G4_Predicate *pred,
Create_SplitSend_Inst_For_CISA(pred, dst,
msgs[0], sizes[0], msgs[1], sizes[1],
resLen,
exSize,
instExSize,
MD, 0, sfid,
false, useHeader,
true, false,
......@@ -5561,10 +5570,19 @@ int IR_Builder::translateScatter4Inst(G4_Predicate *pred,
startTimer(TIMER_VISA_BUILDER_IR_CONSTRUCTION);
#endif
ASSERT_USER(execSize == EXEC_SIZE_8 || execSize == EXEC_SIZE_16,
"Only support SIMD8 or SIMD16!");
ASSERT_USER(execSize == EXEC_SIZE_1 || execSize == EXEC_SIZE_2 ||
execSize == EXEC_SIZE_4 || execSize == EXEC_SIZE_8 ||
execSize == EXEC_SIZE_16,
"Only support SIMD1, SIMD2, SIMD4, SIMD8 or SIMD16!");
Common_ISA_Exec_Size instExecSize = execSize;
if (execSize == EXEC_SIZE_1 || execSize == EXEC_SIZE_2 ||
execSize == EXEC_SIZE_4) {
execSize = EXEC_SIZE_8;
}
unsigned exSize = Get_Common_ISA_Exec_Size(execSize);
unsigned instExSize = Get_Common_ISA_Exec_Size(instExecSize);
unsigned instOpt = Get_Gen4_Emask(eMask, exSize);
bool useSplitSend = useSends();
......@@ -5575,7 +5593,7 @@ int IR_Builder::translateScatter4Inst(G4_Predicate *pred,
if (!globalOffset->isImm() || globalOffset->asImm()->getImm() != 0) {
G4_Declare *dcl = Create_MRF_Dcl(exSize, offsets->getType());
G4_DstRegRegion *tmp = Create_Dst_Opnd_From_Dcl(dcl, 1);
createInst(pred, G4_add, 0, false, exSize, tmp, offsets, globalOffset, instOpt);
createInst(pred, G4_add, 0, false, instExSize, tmp, offsets, globalOffset, instOpt);
offsets = Create_Src_Opnd_From_Dcl(dcl, getRegionStride1());
}
......@@ -5624,7 +5642,7 @@ int IR_Builder::translateScatter4Inst(G4_Predicate *pred,
Create_Send_Inst_For_CISA(pred, dst,
msgs[0], sizes[0],
0,
exSize,
instExSize,
MD, sfid,
false, useHeader,
false, true,
......@@ -5634,7 +5652,7 @@ int IR_Builder::translateScatter4Inst(G4_Predicate *pred,
Create_SplitSend_Inst_For_CISA(pred, dst,
msgs[0], sizes[0], msgs[1], sizes[1],
0,
exSize,
instExSize,
MD, 0, sfid,
false, useHeader,
false, true,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment