Commit 35e635c9 authored by Junjie Gu's avatar Junjie Gu

First step to handle generic aliasing to get rid of redundant mov as possible

Change-Id: I2f75ca4784fa1b2daad0f522d6a27d296773213f
parent dc22773a
This diff is collapsed.
/*===================== begin_copyright_notice ==================================
Copyright (c) 2017 Intel Corporation
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
======================= end_copyright_notice ==================================*/
#pragma once
namespace llvm
{
class FunctionPass;
}
llvm::FunctionPass *createAddCopyIntrinsicPass();
......@@ -104,8 +104,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "Compiler/MetaDataUtilsWrapper.h"
#include "Compiler/SPIRMetaDataTranslation.h"
#include "Compiler/Optimizer/OpenCLPasses/ErrorCheckPass.h"
#include "AdaptorCommon/AddCopyIntrinsic.hpp"
#include "Compiler/MetaDataApi/IGCMetaDataDefs.h"
#include "Compiler/MetaDataApi/IGCMetaDataHelper.h"
#include "Compiler/CodeGenContextWrapper.hpp"
......@@ -377,6 +376,9 @@ static void CommonOCLBasedPasses(
mpm.add(new ResourceAllocator());
mpm.add(new SubGroupFuncsResolution());
// This is an experimental pass!
mpm.add(createAddCopyIntrinsicPass());
// Run InlineLocals and GenericAddressDynamic together
mpm.add(new InlineLocalsResolution());
......
......@@ -2085,7 +2085,7 @@ CVariable *CShader::GetSymbolFromSource(Instruction *UseInst,
if (!DefInst || GetIsUniform(DefInst))
continue;
if (m_deSSA && m_deSSA->getRootValue(DefInst))
if (!IsSimpleVariable(DefInst))
{
continue;
}
......@@ -2106,7 +2106,7 @@ CVariable *CShader::GetSymbolFromSource(Instruction *UseInst,
if (!DefInst)
return nullptr;
if (m_deSSA && m_deSSA->getRootValue(DefInst))
if (!IsSimpleVariable(DefInst))
{
return nullptr;
}
......@@ -2470,6 +2470,7 @@ bool CShader::CanTreatAsAlias(llvm::ExtractElementInst *inst)
{
return false;
}
if (m_deSSA)
{
if (m_deSSA->getRootValue(inst))
......@@ -2483,6 +2484,7 @@ bool CShader::CanTreatAsAlias(llvm::ExtractElementInst *inst)
}
}
for (auto I = vecSrc->user_begin(), E = vecSrc->user_end(); I != E; ++I)
{
llvm::ExtractElementInst* extract = llvm::dyn_cast<llvm::ExtractElementInst>(*I);
......@@ -2583,6 +2585,20 @@ bool CShader::CanTreatScalarSourceAsAlias(llvm::InsertElementInst *IEI) {
return true;
}
bool CShader::HasBecomeNoop(Instruction *inst) {
return m_VRA->m_HasBecomeNoopInsts.count(inst);
}
bool CShader::IsSimpleVariable(Value* V) {
if ((m_VRA && m_VRA->isAliasedValue(V)) ||
(m_deSSA && m_deSSA->getRootValue(V)) ||
(m_coalescingEngine && m_coalescingEngine->GetValueCCTupleMapping(V)))
{
return false;
}
return true;
}
#define SET_INTRINSICS() \
GenISAIntrinsic::GenISA_setMessagePhaseX: \
case GenISAIntrinsic::GenISA_setMessagePhaseXV: \
......
......@@ -4822,7 +4822,6 @@ void EmitPass::emitSimdBlockWrite( llvm::Instruction* inst, llvm::Value* ptrVal
bytesToRead = getBlockMsgSize(bytesRemaining, canDo256Byte);
bytesRemaining -= bytesToRead;
m_encoder->OWStore( data, resource.m_surfaceType, resource.m_resource, src0shifted, bytesToRead, srcOffset );
srcOffset = srcOffset + bytesToRead;
m_encoder->Push();
......@@ -7354,6 +7353,10 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
case GenISAIntrinsic::GenISA_dp4a_us:
emitDP4A(inst);
break;
case GenISAIntrinsic::GenISA_Copy:
{
emitGenISACopy(inst);
}
case GenISAIntrinsic::GenISA_evaluateSampler:
// nothing to do
break;
......@@ -8011,6 +8014,14 @@ void EmitPass::emitLoad(LoadInst* inst)
void EmitPass::EmitNoModifier(llvm::Instruction* inst)
{
// This is a single instruction pattern emitter
// Check if this inst has been turned into noop due to alias.
// If so, no code shall be emitted for this instruction.
if (m_currShader->HasBecomeNoop(inst))
{
return;
}
switch(inst->getOpcode())
{
case Instruction::Ret:
......@@ -13056,6 +13067,15 @@ void EmitPass::emitVectorCopy(CVariable *Dst, CVariable *Src, uint32_t nElts,
}
}
// Handle Copy intrinsic
void EmitPass::emitGenISACopy(GenIntrinsicInst *GenCopyInst)
{
CVariable *Dst = m_destination;
CVariable *Src = GetSymbol(GenCopyInst->getArgOperand(0));
Type* Ty = GenCopyInst->getType();
emitCopyAll(Dst, Src, Ty);
}
/// \brief Emulate the 64-bit addition of uniform `Src` with vector immediate
/// value `Imm`. `Imm` should be in type of Type_UV.
void EmitPass::emitAddPairWithImm(CVariable *Dst, CVariable *Src,
......
......@@ -349,6 +349,7 @@ public:
void emitVectorBitCast(llvm::BitCastInst *BCI);
void emitVectorLoad(llvm::LoadInst *LI, llvm::Value* offset);
void emitVectorStore(llvm::StoreInst *SI);
void emitGenISACopy(llvm::GenIntrinsicInst *GenCopyInst);
void emitVectorCopy(CVariable *Dst, CVariable *Src, uint32_t nElts,
uint32_t DstSubRegOffset = 0, uint32_t SrcSubRegOffset = 0);
void emitCopyAll(CVariable *Dst, CVariable *Src, llvm::Type *Ty);
......
......@@ -217,6 +217,13 @@ public:
bool CanTreatAsAlias(llvm::ExtractElementInst *inst);
bool CanTreatScalarSourceAsAlias(llvm::InsertElementInst *);
bool HasBecomeNoop(llvm::Instruction *inst);
// If V is not in any congruent class, not aliased to any other
// variables, not payload-coalesced, it is a simple variable
// and this function returns true.
bool IsSimpleVariable(llvm::Value* V);
bool VMECoalescePattern(llvm::GenIntrinsicInst*);
bool isUnpacked(llvm::Value* value);
......
......@@ -32,6 +32,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "common/LLVMWarningsPush.hpp"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/TinyPtrVector.h"
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/InstIterator.h>
......@@ -46,12 +47,23 @@ namespace IGC {
struct SSubVector
{
// BaseVector is a vector type. It denotes a sub-vector
// of BaseVector starting at StartElementOffset.
// Denote a sub-vector of BaseVector starting at StartElementOffset.
//
// It is used as an aliasee in the pair <Value, SSubVector>, thus the
// size of the sub-vector is the size of Value (aliaser) of this pair.
// (If needed, add the number of elements in SSubVector.)
llvm::Value* BaseVector;
short StartElementOffset;
};
// Represent a Vector's element at index = EltIx.
struct SVecElement {
llvm::Value* Vec;
int EltIx;
SVecElement() : Vec(nullptr), EltIx(-1) {}
};
/// RPE based analysis for querying variable reuse status.
///
/// Let two instructions DInst and UInst be defined in the same basic block,
......@@ -82,6 +94,9 @@ public:
~VariableReuseAnalysis() {}
typedef llvm::DenseMap<llvm::Value*, SSubVector> ValueAliasMapTy;
typedef llvm::DenseMap<llvm::Value*, llvm::TinyPtrVector<llvm::Value*> > AliasRootMapTy;
typedef llvm::SmallVector<SVecElement, 32> VecEltTy;
typedef llvm::SmallVector<llvm::Value*, 32> ValueVectorTy;
virtual bool runOnFunction(llvm::Function &F) override;
......@@ -143,18 +158,30 @@ public:
bool isLocalValue(llvm::Value* V);
bool aliasHasInterference(llvm::Value* Aliaser, llvm::Value* Aliasee);
bool hasInterference(llvm::Value* V0, llvm::Value* V1);
// Visitor
void visitCallInst(llvm::CallInst& I);
void visitCastInst(llvm::CastInst& I);
void visitInsertElementInst(llvm::InsertElementInst& I);
void visitExtractElementInst(llvm::ExtractElementInst& I);
bool isAliasedValue(llvm::Value *V) {
return (isAliaser(V) || isAliasee(V));
}
bool isAliaser(llvm::Value* V);
bool isAliasee(llvm::Value* V);
int getCongruentClassSize(llvm::Value* V);
bool isSameSizeValue(llvm::Value* V0, llvm::Value* V1);
// Collect aliases from subVector to base vector. The map's key is
// assumed to be an independent value (not in any congruent class)
// originally. Of course, after aliasing, it must be in a congruent
// class.
ValueAliasMapTy m_ValueAliasMap;
// Collect aliases from subVector to base vector.
ValueAliasMapTy m_ValueAliasMap; // aliaser -> aliasee
// Reverse of m_ValueAliasMap.
AliasRootMapTy m_AliasRootMap; // aliasee -> all its aliasers.
// No need to emit code for instructions in this map due to aliasing
llvm::DenseMap <llvm::Instruction*, int > m_HasBecomeNoopInsts;
private:
void reset() {
......@@ -162,6 +189,8 @@ private:
m_IsFunctionPressureLow = Status::Undef;
m_IsBlockPressureLow = Status::Undef;
m_ValueAliasMap.clear();
m_AliasRootMap.clear();
m_HasBecomeNoopInsts.clear();
}
// Initialize per-block states. In particular, check if the entire block has a
......@@ -193,12 +222,47 @@ private:
return (m_coalescingEngine->GetValueCCTupleMapping(V) != nullptr);
}
void mergeVariables(llvm::Function *F);
// Add entry to alias map.
bool addAlias(
llvm::Value* Aliaser,
SSubVector& SVD);
// Returns true for the following pattern:
// a = extractElement <vectorType> EEI_Vec, <constant EEI_ix>
// b = insertElement <vectorType> V1, E, <constant IEI_ix>
// where EEI_ix and IEI_ix are constants; Return false otherwise.
bool getVectorIndicesIfConstant(
llvm::InsertElementInst* IEI,
int& IEI_ix,
llvm::Value*& EEI_Vec,
int&EEI_ix);
bool checkAndGetAllInsertElements(
llvm::InsertElementInst* FirstIEI,
ValueVectorTy& AllIEIs,
VecEltTy& AllElts);
bool IsExtractFrom(
VecEltTy& AllElts,
llvm::InsertElementInst* FirstIEI,
llvm::InsertElementInst* LastIEI,
SSubVector& SV);
bool IsInsertTo(
VecEltTy& AllElts,
llvm::InsertElementInst* FirstIEI,
llvm::InsertElementInst* LastIEI,
llvm::SmallVector<SSubVector, 4>& SVs);
CodeGenContext* m_pCtx;
WIAnalysis* m_WIA;
LiveVars* m_LV;
DeSSA* m_DeSSA;
CodeGenPatternMatch* m_PatternMatch;
CoalescingEngine* m_coalescingEngine;
const llvm::DataLayout* m_DL;
// The register pressure estimator (optional).
RegisterEstimator *m_RPE;
......@@ -222,6 +286,11 @@ private:
// When this block has low register pressure, reuse can be applied
// aggressively without checking each individual def-use pair.
Status m_IsBlockPressureLow;
// Temporaries
//SmallPtrSet<llvm::Instruction*, 16> m_Visited;
ValueAliasMapTy m_ExtractFrom;
ValueAliasMapTy m_insertTo;
};
llvm::FunctionPass *createVariableReuseAnalysisPass();
......
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
set(IGC_BUILD__SRC__DriverInterface
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/AddImplicitArgs.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/customApi.cpp"
......@@ -9,6 +8,7 @@ set(IGC_BUILD__SRC__DriverInterface
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/ProcessFuncAttributes.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/TypesLegalizationPass.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/IRUpgrader/UpgraderResourceAccess.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/AddCopyIntrinsic.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/OCL/LoadBuffer.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/OCL/Patch/patch_parser.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/OCL/Platform/cmd_media_caps_g8.cpp"
......@@ -44,7 +44,7 @@ endif(IGC_BUILD__SPIRV_ENABLED)
if(LLVM_ON_WIN32)
set(IGC_BUILD__SRC_Win__DriverInterface
set(IGC_BUILD__SRC_Win__DriverInterface
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/OCL/sp/sp_debug.cpp"
)
......@@ -55,7 +55,7 @@ set(IGC_BUILD__HDR__DriverInterface
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/SurfaceFormats.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/ShaderTypesEnum.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/IRUpgrader/IRUpgrader.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorCommon/AddCopyIntrinsic.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/OCL/KernelAnnotations.hpp"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/OCL/CommandStream/SamplerTypes.h"
"${CMAKE_CURRENT_SOURCE_DIR}/../AdaptorOCL/OCL/CommandStream/SurfaceTypes.h"
......
......@@ -273,6 +273,7 @@ Imported_Intrinsics = \
"GenISA_InitDiscardMask": ["bool",[],"None"],
"GenISA_UpdateDiscardMask": ["bool",["bool","bool"],"None"],
"GenISA_GetPixelMask": ["bool",["bool"],"None"],
"GenISA_Copy": ["anyvector",[0],"None"],
"GenISA_dp4a_ss": ["int",["int","int","int"],"NoMem"],
"GenISA_dp4a_uu": ["int",["int","int","int"],"NoMem"],
"GenISA_dp4a_su": ["int",["int","int","int"],"NoMem"],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment