Commit 63cce630 authored by Chen, Weiyu's avatar Chen, Weiyu Committed by gbsbuild

SIMD32 64b scalar scan operation fix

Change-Id: I17289a1d7abcaa4afbd6111ec02cfbf2f81d259c
parent 1335bfc1
......@@ -10126,9 +10126,10 @@ void EmitPass::emitPreOrPostFixOpScalar(
// This is to handle cases when not all lanes are enabled. In that case we fill the lanes with 0.
bool isSimd32 = (m_currShader->m_dispatchSize == SIMDMode::SIMD32);
int counter = isSimd32 ? 2 : 1;
CVariable* pSrcCopy[2] = {nullptr};
for (int i = 0; i < counter; ++i)
{
CVariable* pSrcCopy = m_currShader->GetNewVariable(
pSrcCopy[i] = m_currShader->GetNewVariable(
numLanes(m_currShader->m_SIMDSize),
type,
IGC::EALIGN_GRF,
......@@ -10142,8 +10143,9 @@ void EmitPass::emitPreOrPostFixOpScalar(
// Set the GRF to 0 with no mask. This will set all the registers to 0
CVariable* pIdentityValue = m_currShader->ImmToVariable(identityValue, type);
m_encoder->SetSecondHalf(i == 1);
m_encoder->SetNoMask();
m_encoder->Copy(pSrcCopy, pIdentityValue);
m_encoder->Copy(pSrcCopy[i], pIdentityValue);
m_encoder->Push();
// Now copy the src with a mask so the disabled lanes still keep their 0
......@@ -10152,7 +10154,7 @@ void EmitPass::emitPreOrPostFixOpScalar(
m_encoder->SetSrcModifier(0, EMOD_NEG);
}
m_encoder->SetSecondHalf(i == 1);
m_encoder->Copy(pSrcCopy, src);
m_encoder->Copy(pSrcCopy[i], src);
m_encoder->Push();
int srcIdx = 0;
......@@ -10161,15 +10163,26 @@ void EmitPass::emitPreOrPostFixOpScalar(
if (isPrefix)
{
// For case where we need the prefix shift the source by 1 lane
// (W) mov (1) result[0] identity
m_encoder->Copy(result[i], pIdentityValue);
if (i == 0)
{
// (W) mov (1) result[0] identity
m_encoder->Copy(result[i], pIdentityValue);
}
else
{
// (W) mov (1) results[16], srcCopy[15]
m_encoder->SetSrcSubReg(0, 15);
m_encoder->SetSrcRegion(0, 0, 1, 0);
m_encoder->Copy(result[i], pSrcCopy[0]);
}
}
else
{
// (W) mov (1) result[0]:ty srcCopy[0/16]:ty
m_encoder->SetSrcSubReg(0, i == 0 ? 0 : 16);
// (W) mov (1) result[0/16]:ty srcCopy[0/16]:ty
m_encoder->SetSrcSubReg(0, 0);
m_encoder->SetSrcRegion(0, 0, 1, 0);
m_encoder->Copy(result[i], pSrcCopy);
m_encoder->Copy(result[i], pSrcCopy[i]);
srcIdx = 1;
}
m_encoder->Push();
......@@ -10185,7 +10198,7 @@ void EmitPass::emitPreOrPostFixOpScalar(
m_encoder->SetSrcSubReg(1, dstIdx - 1);
m_encoder->SetDstRegion(1);
m_encoder->SetDstSubReg(dstIdx);
m_encoder->GenericAlu(op, result[i], pSrcCopy, result[i]);
m_encoder->GenericAlu(op, result[i], pSrcCopy[i], result[i]);
m_encoder->Push();
}
}
......@@ -10193,17 +10206,12 @@ void EmitPass::emitPreOrPostFixOpScalar(
if (isSimd32)
{
// For SIMD32 we need to write the last element of the prev element to the next 16 elements
for (int i = 0; i < 2; ++i)
{
m_encoder->SetSimdSize(SIMDMode::SIMD8);
m_encoder->SetNoMask();
m_encoder->SetSrcRegion(0, 0, 1, 0);
m_encoder->SetSrcSubReg(0, 15);
m_encoder->SetSrcSubReg(1, i == 0 ? 0 : 8);
m_encoder->SetDstSubReg(i == 0 ? 0 : 8);
m_encoder->GenericAlu(op, result[1], result[0], result[1]);
m_encoder->Push();
}
m_encoder->SetSimdSize(SIMDMode::SIMD16);
m_encoder->SetNoMask();
m_encoder->SetSrcRegion(0, 0, 1, 0);
m_encoder->SetSrcSubReg(0, 15);
m_encoder->GenericAlu(op, result[1], result[0], result[1]);
m_encoder->Push();
}
// reset second half state
m_encoder->SetSecondHalf(false);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment