Commit 3246f772 authored by Thomas's avatar Thomas Committed by gbsbuild

Move uniform atomic logic outside of emit pass. This will allow for more

complex optimizations and the new intrinsic may be used in more cases

Change-Id: I6992ba21b50e288332b90a381fc796b874397771
parent 11fd18d0
......@@ -71,6 +71,7 @@ set(IGC_BUILD__SRC__CISACodeGen_Common
"${CMAKE_CURRENT_SOURCE_DIR}/Simd32Profitability.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/TypeDemote.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/VariableReuseAnalysis.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/UniformAtomic.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/TranslationTable.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/VectorPreProcess.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/VectorProcess.cpp"
......@@ -114,6 +115,7 @@ set(IGC_BUILD__HDR__CISACodeGen_Common
"${CMAKE_CURRENT_SOURCE_DIR}/FoldKnownWorkGroupSizes.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GenCodeGenModule.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GenIRLowering.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GenLLVMPasses.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GenNullPointerLowering.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GenSimplification.h"
"${CMAKE_CURRENT_SOURCE_DIR}/GeometryShaderCodeGen.hpp"
......
......@@ -1657,6 +1657,7 @@ static bool IsRawAtomicIntrinsic(llvm::Value *V) {
case GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
case GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
case GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
case GenISAIntrinsic::GenISA_WaveUniformAtomic:
return true;
}
......@@ -1695,7 +1696,7 @@ static e_alignment GetPreferredAlignmentOnUse(llvm::Value *V, WIAnalysis *WIA,
}
if (IsRawAtomicIntrinsic(GII)) {
Value *Ptr = GII->getArgOperand(1);
Value *Ptr = V;
if (WIA->whichDepend(Ptr) == WIAnalysis::UNIFORM) {
if (PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType())) {
if (IGC::isA64Ptr(PtrTy, pContext))
......@@ -2157,6 +2158,8 @@ unsigned int CShader::EvaluateSIMDConstExpr(Value* C)
{
switch(op->getOpcode())
{
case Instruction::Sub:
return EvaluateSIMDConstExpr(op->getOperand(0)) - EvaluateSIMDConstExpr(op->getOperand(1));
case Instruction::Add:
return EvaluateSIMDConstExpr(op->getOperand(0)) + EvaluateSIMDConstExpr(op->getOperand(1));
case Instruction::Mul:
......
This diff is collapsed.
......@@ -229,14 +229,7 @@ public:
void emitUAVSerialize();
void emitScalarAtomics(
llvm::Instruction* pInst,
const ResourceDescriptor& resource,
AtomicOp atomic_op,
CVariable* pDstAddr,
CVariable* pSrc,
bool isA64,
bool is16Bit);
void emitScalarAtomics(llvm::Instruction* pInst);
/// do reduction and accummulate all the activate channels, return a uniform
void emitReductionAll(
e_opcode op,
......
/*===================== begin_copyright_notice ==================================
Copyright (c) 2017 Intel Corporation
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
======================= end_copyright_notice ==================================*/
#pragma once
//////////////////////////////////////////////////////////////////////////
// Passes declaration for Gen specific optimization
namespace LLVM
{
class FunctionPass;
}
namespace IGC
{
/// Transform raw atomic into WaveUniformAtomic when the address is uniform
FunctionPass *createUniformAtomicPass();
}
......@@ -198,15 +198,11 @@ bool CodeGenPatternMatch::SIMDConstExpr(Instruction* C)
{
switch(op->getOpcode())
{
case Instruction::Sub:
case Instruction::Add:
isConstExpr = IsConstOrSimdConstExpr(op->getOperand(0)) && IsConstOrSimdConstExpr(op->getOperand(1));
break;
case Instruction::Mul:
isConstExpr = IsConstOrSimdConstExpr(op->getOperand(0)) && IsConstOrSimdConstExpr(op->getOperand(1));
break;
case Instruction::Shl:
isConstExpr = IsConstOrSimdConstExpr(op->getOperand(0)) && IsConstOrSimdConstExpr(op->getOperand(1));
break;
default:
break;
}
......@@ -377,6 +373,7 @@ void CodeGenPatternMatch::SetPatternRoot(llvm::Instruction& inst)
{
m_root = &inst;
m_rootIsSubspanUse = IsSubspanUse(m_root);
HandleNoMaskIntrinsic(&inst);
}
template<typename Op_t, typename ConstTy>
......@@ -868,6 +865,25 @@ bool CodeGenPatternMatch::HasUseOutsideLoop(llvm::Value* v)
return false;
}
bool CodeGenPatternMatch::IsWaveReduction(llvm::Value* v)
{
if(GenIntrinsicInst* genIntrinsic = dyn_cast<GenIntrinsicInst>(v))
{
switch(genIntrinsic->getIntrinsicID())
{
case GenISAIntrinsic::GenISA_WavePrefix:
case GenISAIntrinsic::GenISA_QuadPrefix:
case GenISAIntrinsic::GenISA_WaveAll:
case GenISAIntrinsic::GenISA_WaveBallot:
return true;
break;
default:
break;
}
}
return false;
}
void CodeGenPatternMatch::HandleSubspanUse(llvm::Value* v)
{
assert(m_root!=nullptr);
......@@ -877,7 +893,11 @@ void CodeGenPatternMatch::HandleSubspanUse(llvm::Value* v)
}
if(!isa<Constant>(v) && m_WI->whichDepend(v) != WIAnalysis::UNIFORM)
{
if(isa<PHINode>(v) || HasUseOutsideLoop(v))
if(IsWaveReduction(v))
{
// do nothing
}
else if(isa<PHINode>(v) || HasUseOutsideLoop(v))
{
// If a phi is used in a subspan we cannot propagate the subspan use and need to use VMask
m_NeedVMask = true;
......@@ -901,6 +921,27 @@ void CodeGenPatternMatch::HandleSubspanUse(llvm::Value* v)
}
}
/// Intrinsics not honoring the SIMD mask need to be force isolated
/// so that they don't get coalesced in a divergent phi
void CodeGenPatternMatch::HandleNoMaskIntrinsic(Value* v)
{
if(GenIntrinsicInst* genIntrinsic = dyn_cast<GenIntrinsicInst>(v))
{
switch(genIntrinsic->getIntrinsicID())
{
case GenISAIntrinsic::GenISA_WavePrefix:
case GenISAIntrinsic::GenISA_QuadPrefix:
if(HasPhiUse(*v) && m_WI->insideDivergentCF(m_root))
{
ForceIsolate(v);
}
break;
default:
break;
}
}
}
bool CodeGenPatternMatch::MatchMinMax(llvm::SelectInst &SI) {
// Pattern to emit.
struct MinMaxPattern : public Pattern {
......
......@@ -240,9 +240,11 @@ public:
void CreateBasicBlocks(llvm::Function * pLLVMFunc);
uint GetBlockId(llvm::BasicBlock* bb);
void HandleSubspanUse(llvm::Value* v);
void HandleNoMaskIntrinsic(llvm::Value* v);
void HandleSampleDerivative(llvm::GenIntrinsicInst & I);
bool IsSubspanUse(llvm::Value* v);
bool HasUseOutsideLoop(llvm::Value* v);
bool IsWaveReduction(llvm::Value* v);
bool NeedVMask();
//helper function
......
......@@ -69,6 +69,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "Compiler/CISACodeGen/VectorProcess.hpp"
#include "Compiler/CISACodeGen/LowerGEPForPrivMem.hpp"
#include "Compiler/CISACodeGen/POSH_RemoveNonPositionOutput.h"
#include "Compiler/CISACodeGen/GenLLVMPasses.h"
#include "Compiler/CISACodeGen/SLMConstProp.hpp"
#include "Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.hpp"
......@@ -87,6 +88,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "Compiler/Optimizer/GatingSimilarSamples.hpp"
#include "Compiler/MetaDataApi/PurgeMetaDataUtils.hpp"
#include "Compiler/HandleLoadStoreInstructions.hpp"
#include "Compiler/CustomSafeOptPass.hpp"
#include "Compiler/CustomUnsafeOptPass.hpp"
......@@ -270,6 +272,10 @@ inline void AddAnalysisPasses(CodeGenContext &ctx, const CShaderProgram::KernelS
{
mpm.add(new ConstantCoalescing());
}
if(ctx.m_instrTypes.hasAtomics)
{
mpm.add(createUniformAtomicPass());
}
if( !isOptDisabled )
{
// If you want to clean up the dead-code after push optimization
......
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
#include "Compiler/CISACodeGen/WIAnalysis.hpp"
#include "Compiler/CodeGenContextWrapper.hpp"
#include "Compiler/MetaDataUtilsWrapper.h"
#include "Compiler/IGCPassSupport.h"
#include "common/LLVMWarningsPush.hpp"
#include <llvm/Pass.h>
#include <llvm/ADT/SmallVector.h>
#include <llvm/IR/InstVisitor.h>
#include <llvm/IR/Instruction.h>
#include <llvm/Support/raw_ostream.h>
#include "common/LLVMWarningsPop.hpp"
using namespace llvm;
using namespace IGC;
using namespace IGC::IGCMD;
namespace {
class UniformAtomic : public FunctionPass, public InstVisitor<UniformAtomic>
{
public:
static char ID;
UniformAtomic() : FunctionPass(ID) {
initializeUniformAtomicPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &) override;
void visitCallInst(llvm::CallInst &C);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addPreservedID(WIAnalysis::ID);
AU.addRequired<WIAnalysis>();
AU.addRequired<CodeGenContextWrapper>();
}
private:
struct AtomicInfo
{
Value* resourcePtr;
Value* offset;
Value* src;
Value * atomicOp;
WaveOps op;
};
void GetScalarAtomicInfo(CallInst* atomic, AtomicInfo& info);
void Get(CallInst* pInst);
bool IsUniformAtomic(CallInst* pInst);
bool m_changed = false;
WIAnalysis* m_WIAnalysis = nullptr;
};
} // End anonymous namespace
char UniformAtomic::ID = 0;
#define PASS_FLAG "igc-uniform-atomic"
#define PASS_DESC "optimized atomic to uniform address"
#define PASS_CFG_ONLY false
#define PASS_ANALYSIS false
IGC_INITIALIZE_PASS_BEGIN(UniformAtomic, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
IGC_INITIALIZE_PASS_END(UniformAtomic, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
namespace IGC
{
FunctionPass *createUniformAtomicPass() {
return new UniformAtomic();
}
}
bool UniformAtomic::runOnFunction(Function &F)
{
if(IGC_IS_FLAG_ENABLED(DisableScalarAtomics) ||
getAnalysis<CodeGenContextWrapper>().getCodeGenContext()->m_DriverInfo.WASLMPointersDwordUnit())
{
return false;
}
m_WIAnalysis = &getAnalysis<WIAnalysis>();
visit(F);
return m_changed;
}
bool UniformAtomic::IsUniformAtomic(llvm::CallInst* pInst)
{
if(llvm::GenIntrinsicInst* pIntrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(pInst))
{
GenISAIntrinsic::ID id = pIntrinsic->getIntrinsicID();
// Dst address in bytes.
if(id == GenISAIntrinsic::GenISA_intatomicraw ||
id == GenISAIntrinsic::GenISA_intatomicrawA64)
{
// TODO: add support for 64bits type
if(pInst->getType()->getScalarSizeInBits() == 64)
{
return false;
}
llvm::Value* pllDstAddr = pInst->getOperand(1);
bool uniformAddress = m_WIAnalysis->whichDepend(pllDstAddr) == WIAnalysis::UNIFORM;
if(uniformAddress)
{
AtomicOp atomic_op = static_cast<AtomicOp>(llvm::cast<llvm::ConstantInt>(pInst->getOperand(3))->getZExtValue());
bool isAddAtomic = atomic_op == EATOMIC_IADD ||
atomic_op == EATOMIC_INC ||
atomic_op == EATOMIC_SUB;
bool isMinMaxAtomic =
atomic_op == EATOMIC_UMAX ||
atomic_op == EATOMIC_UMIN ||
atomic_op == EATOMIC_IMIN ||
atomic_op == EATOMIC_IMAX;
if(isAddAtomic || (isMinMaxAtomic && pInst->use_empty()))
return true;
}
}
}
return false;
}
void UniformAtomic::GetScalarAtomicInfo(CallInst* pInst, AtomicInfo& info)
{
IRBuilder<> builder(pInst);
GenISAIntrinsic::ID id = cast<GenIntrinsicInst>(pInst)->getIntrinsicID();
info.resourcePtr = pInst->getOperand(0);
info.offset = pInst->getOperand(1);
info.src = pInst->getOperand(2);
info.atomicOp = pInst->getOperand(3);
if(id != GenISAIntrinsic::GenISA_intatomicraw)
{
info.offset = UndefValue::get(builder.getInt32Ty());
}
AtomicOp atomic_op = static_cast<AtomicOp>(cast<ConstantInt>(info.atomicOp)->getZExtValue());
switch(atomic_op)
{
case EATOMIC_IADD:
case EATOMIC_SUB:
case EATOMIC_INC:
case EATOMIC_DEC:
info.op = WaveOps::SUM;
info.atomicOp = builder.getInt32(EATOMIC_IADD);
break;
case EATOMIC_UMAX:
info.op = WaveOps::UMAX;
break;
case EATOMIC_IMAX:
info.op = WaveOps::IMAX;
break;
case EATOMIC_UMIN:
info.op = WaveOps::UMIN;
break;
case EATOMIC_IMIN:
info.op = WaveOps::IMIN;
break;
default:
assert(0 && "unsupported scalar atomic type");
break;
}
if(atomic_op == EATOMIC_DEC || atomic_op == EATOMIC_INC)
{
info.src = builder.getIntN(pInst->getType()->getScalarSizeInBits(), atomic_op == EATOMIC_DEC ? -1 : 1);
}
if(atomic_op == EATOMIC_SUB)
{
info.src = builder.CreateNeg(info.src);
}
}
void UniformAtomic::visitCallInst(llvm::CallInst &C)
{
Module* module = C.getParent()->getParent()->getParent();
if(IsUniformAtomic(&C))
{
AtomicInfo info;
GetScalarAtomicInfo(&C, info);
Function* AtomicIntr = GenISAIntrinsic::getDeclaration(
module,
GenISAIntrinsic::GenISA_WaveUniformAtomic,
{ C.getType(), C.getOperand(0)->getType() });
IRBuilder<> builder(&C);
if(C.user_empty())
{
Function* intr =
GenISAIntrinsic::getDeclaration(module, GenISAIntrinsic::GenISA_WaveAll, C.getType());
Value* reducedSrc = builder.CreateCall(
intr, { info.src, builder.getInt8(static_cast<uint32_t>(info.op))});
m_WIAnalysis->incUpdateDepend(reducedSrc, WIAnalysis::UNIFORM);
Value* atomicInst = builder.CreateCall(
AtomicIntr, { info.resourcePtr, info.offset, reducedSrc, info.atomicOp });
m_WIAnalysis->incUpdateDepend(atomicInst, WIAnalysis::UNIFORM);
}
else
{
Function* intr =
GenISAIntrinsic::getDeclaration(module, GenISAIntrinsic::GenISA_WavePrefix, C.getType());
Value* scanSrc = builder.CreateCall(
intr,
{ info.src, builder.getInt8(static_cast<uint32_t>(info.op)), builder.getInt1(true) });
m_WIAnalysis->incUpdateDepend(scanSrc, WIAnalysis::RANDOM);
Function* shuffleIntr =
GenISAIntrinsic::getDeclaration(module, GenISAIntrinsic::GenISA_WaveShuffleIndex, C.getType());
Function* simdSizeIntr =
GenISAIntrinsic::getDeclaration(module, GenISAIntrinsic::GenISA_simdSize);
Value* simdSize = builder.CreateCall(simdSizeIntr);
m_WIAnalysis->incUpdateDepend(simdSize, WIAnalysis::UNIFORM);
Value* lastChannel = builder.CreateSub(simdSize, builder.getInt32(1));
m_WIAnalysis->incUpdateDepend(lastChannel, WIAnalysis::UNIFORM);
Value* sumall = builder.CreateCall(shuffleIntr, { scanSrc, lastChannel });
m_WIAnalysis->incUpdateDepend(sumall, WIAnalysis::UNIFORM);
Value* atomicInst = builder.CreateCall(
AtomicIntr, { info.resourcePtr, info.offset, sumall, info.atomicOp });
m_WIAnalysis->incUpdateDepend(atomicInst, WIAnalysis::UNIFORM);
Value* returnVal = builder.CreateAdd(atomicInst, scanSrc);
m_WIAnalysis->incUpdateDepend(returnVal, WIAnalysis::RANDOM);
returnVal = builder.CreateSub(returnVal, info.src);
m_WIAnalysis->incUpdateDepend(returnVal, WIAnalysis::RANDOM);
C.replaceAllUsesWith(returnVal);
}
C.eraseFromParent();
}
}
......@@ -136,3 +136,4 @@ void initializeHalfPromotionPass(llvm::PassRegistry&);
void initializeFixFastMathFlagsPass(llvm::PassRegistry&);
void initializeCodeAssumptionPass(llvm::PassRegistry&);
void initializeIGCInstructionCombiningPassPass(llvm::PassRegistry&);
void initializeUniformAtomicPass(llvm::PassRegistry&);
......@@ -89,6 +89,7 @@ Imported_Intrinsics = \
"GenISA_ldrawvector_indexed": ["anyvector",["anyptr","int", "int"],"ReadArgMem"],
"GenISA_storeraw_indexed": ["void",["anyptr","int","any:float"],"None"],
"GenISA_storerawvector_indexed": ["void",["anyptr","int","anyvector"],"None"],
"GenISA_WaveUniformAtomic": ["anyint",["anyptr","int",0,"int"],"Convergent"],
"GenISA_intatomicraw": ["anyint",["anyptr","int",0,"int"],"ReadWriteArgMem"],
"GenISA_floatatomicraw": ["anyfloat",["anyptr","int",0,"int"],"ReadWriteArgMem"],
"GenISA_intatomicrawA64": ["anyint",["anyptr","anyptr",0,"int"],"ReadWriteArgMem"],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment