Skip to content
Commits on Source (113)
19.0.0-rc2
19.0.0-rc5
# Both of these were already merged with different shas
da48cba61ef6fefb799bf96e6364b70dbf4ec712
c812c740e60c14060eb89db66039111881a0f42f
\ No newline at end of file
......@@ -13,12 +13,12 @@
is_stable_nomination()
{
git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
}
is_typod_nomination()
{
git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
}
fixes=
......
mesa (19.0.0~rc5-1) experimental; urgency=medium
* New upstream release candidate.
* libgbm1.symbols: Updated.
-- Timo Aaltonen <tjaalton@debian.org> Wed, 20 Feb 2019 14:02:45 +0200
mesa (19.0.0~rc2-1) experimental; urgency=medium
* New upstream snapshot.
......@@ -5,6 +12,12 @@ mesa (19.0.0~rc2-1) experimental; urgency=medium
-- Timo Aaltonen <tjaalton@debian.org> Fri, 08 Feb 2019 10:44:48 +0200
mesa (18.3.4-1) unstable; urgency=medium
* New upstream release.
-- Timo Aaltonen <tjaalton@debian.org> Tue, 19 Feb 2019 11:02:44 +0200
mesa (18.3.3-1) unstable; urgency=medium
[ James Clarke ]
......
......@@ -28,6 +28,7 @@ libgbm.so.1 libgbm1 #MINVER#
gbm_device_get_fd@Base 7.11~1
gbm_device_get_format_modifier_plane_count@Base 17.3.0~rc1
gbm_device_is_format_supported@Base 8.1~0
gbm_format_get_name@Base 19.0.0~rc5
gbm_surface_create@Base 8.1~0
gbm_surface_create_with_modifiers@Base 17.1.0~rc2
gbm_surface_destroy@Base 8.1~0
......
......@@ -1400,7 +1400,7 @@ if with_platform_x11
dep_xcb_xfixes = dependency('xcb-xfixes')
endif
if with_xlib_lease
dep_xcb_xrandr = dependency('xcb-randr', version : '>= 1.12')
dep_xcb_xrandr = dependency('xcb-randr')
dep_xlib_xrandr = dependency('xrandr', version : '>= 1.3')
endif
endif
......
......@@ -923,6 +923,14 @@ ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
}
LLVMValueRef
ac_build_gep_ptr(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr,
LLVMValueRef index)
{
return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
}
LLVMValueRef
ac_build_gep0(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr,
......
......@@ -223,6 +223,11 @@ ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
LLVMValueRef attr_number,
LLVMValueRef params);
LLVMValueRef
ac_build_gep_ptr(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr,
LLVMValueRef index);
LLVMValueRef
ac_build_gep0(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr,
......
......@@ -2006,18 +2006,23 @@ static void
visit_store_var(struct ac_nir_context *ctx,
nir_intrinsic_instr *instr)
{
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
nir_variable *var = nir_deref_instr_get_variable(deref);
LLVMValueRef temp_ptr, value;
int idx = var->data.driver_location;
unsigned comp = var->data.location_frac;
int idx = 0;
unsigned comp = 0;
LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
int writemask = instr->const_index[0];
LLVMValueRef indir_index;
unsigned const_index;
get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), false,
if (var) {
get_deref_offset(ctx, deref, false,
NULL, NULL, &const_index, &indir_index);
idx = var->data.driver_location;
comp = var->data.location_frac;
}
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
......@@ -2030,7 +2035,7 @@ visit_store_var(struct ac_nir_context *ctx,
writemask = writemask << comp;
switch (var->data.mode) {
switch (deref->mode) {
case nir_var_shader_out:
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
......@@ -2039,8 +2044,8 @@ visit_store_var(struct ac_nir_context *ctx,
unsigned const_index = 0;
const bool is_patch = var->data.patch;
get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
false, NULL, is_patch ? NULL : &vertex_index,
get_deref_offset(ctx, deref, false, NULL,
is_patch ? NULL : &vertex_index,
&const_index, &indir_index);
ctx->abi->store_tcs_outputs(ctx->abi, var,
......@@ -2107,7 +2112,7 @@ visit_store_var(struct ac_nir_context *ctx,
int writemask = instr->const_index[0];
LLVMValueRef address = get_src(ctx, instr->src[0]);
LLVMValueRef val = get_src(ctx, instr->src[1]);
if (util_is_power_of_two_nonzero(writemask)) {
if (writemask == (1u << ac_get_llvm_num_components(val)) - 1) {
val = LLVMBuildBitCast(
ctx->ac.builder, val,
LLVMGetElementType(LLVMTypeOf(address)), "");
......@@ -3818,6 +3823,73 @@ static void visit_jump(struct ac_llvm_context *ctx,
}
}
static LLVMTypeRef
glsl_base_to_llvm_type(struct ac_llvm_context *ac,
enum glsl_base_type type)
{
switch (type) {
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SUBROUTINE:
return ac->i32;
case GLSL_TYPE_INT16:
case GLSL_TYPE_UINT16:
return ac->i16;
case GLSL_TYPE_FLOAT:
return ac->f32;
case GLSL_TYPE_FLOAT16:
return ac->f16;
case GLSL_TYPE_INT64:
case GLSL_TYPE_UINT64:
return ac->i64;
case GLSL_TYPE_DOUBLE:
return ac->f64;
default:
unreachable("unknown GLSL type");
}
}
static LLVMTypeRef
glsl_to_llvm_type(struct ac_llvm_context *ac,
const struct glsl_type *type)
{
if (glsl_type_is_scalar(type)) {
return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
}
if (glsl_type_is_vector(type)) {
return LLVMVectorType(
glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
glsl_get_vector_elements(type));
}
if (glsl_type_is_matrix(type)) {
return LLVMArrayType(
glsl_to_llvm_type(ac, glsl_get_column_type(type)),
glsl_get_matrix_columns(type));
}
if (glsl_type_is_array(type)) {
return LLVMArrayType(
glsl_to_llvm_type(ac, glsl_get_array_element(type)),
glsl_get_length(type));
}
assert(glsl_type_is_struct(type));
LLVMTypeRef member_types[glsl_get_length(type)];
for (unsigned i = 0; i < glsl_get_length(type); i++) {
member_types[i] =
glsl_to_llvm_type(ac,
glsl_get_struct_field(type, i));
}
return LLVMStructTypeInContext(ac->context, member_types,
glsl_get_length(type), false);
}
static void visit_deref(struct ac_nir_context *ctx,
nir_deref_instr *instr)
{
......@@ -3839,9 +3911,27 @@ static void visit_deref(struct ac_nir_context *ctx,
result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
get_src(ctx, instr->arr.index));
break;
case nir_deref_type_cast:
case nir_deref_type_ptr_as_array:
result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
get_src(ctx, instr->arr.index));
break;
case nir_deref_type_cast: {
result = get_src(ctx, instr->parent);
LLVMTypeRef pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
LLVMTypeRef type = LLVMPointerType(pointee_type, AC_ADDR_SPACE_LDS);
if (LLVMTypeOf(result) != type) {
if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
result = LLVMBuildBitCast(ctx->ac.builder, result,
type, "");
} else {
result = LLVMBuildIntToPtr(ctx->ac.builder, result,
type, "");
}
}
break;
}
default:
unreachable("Unhandled deref_instr deref type");
}
......@@ -3990,73 +4080,6 @@ ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
}
}
static LLVMTypeRef
glsl_base_to_llvm_type(struct ac_llvm_context *ac,
enum glsl_base_type type)
{
switch (type) {
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SUBROUTINE:
return ac->i32;
case GLSL_TYPE_INT16:
case GLSL_TYPE_UINT16:
return ac->i16;
case GLSL_TYPE_FLOAT:
return ac->f32;
case GLSL_TYPE_FLOAT16:
return ac->f16;
case GLSL_TYPE_INT64:
case GLSL_TYPE_UINT64:
return ac->i64;
case GLSL_TYPE_DOUBLE:
return ac->f64;
default:
unreachable("unknown GLSL type");
}
}
static LLVMTypeRef
glsl_to_llvm_type(struct ac_llvm_context *ac,
const struct glsl_type *type)
{
if (glsl_type_is_scalar(type)) {
return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
}
if (glsl_type_is_vector(type)) {
return LLVMVectorType(
glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
glsl_get_vector_elements(type));
}
if (glsl_type_is_matrix(type)) {
return LLVMArrayType(
glsl_to_llvm_type(ac, glsl_get_column_type(type)),
glsl_get_matrix_columns(type));
}
if (glsl_type_is_array(type)) {
return LLVMArrayType(
glsl_to_llvm_type(ac, glsl_get_array_element(type)),
glsl_get_length(type));
}
assert(glsl_type_is_struct(type));
LLVMTypeRef member_types[glsl_get_length(type)];
for (unsigned i = 0; i < glsl_get_length(type); i++) {
member_types[i] =
glsl_to_llvm_type(ac,
glsl_get_struct_field(type, i));
}
return LLVMStructTypeInContext(ac->context, member_types,
glsl_get_length(type), false);
}
static void
setup_locals(struct ac_nir_context *ctx,
struct nir_function *func)
......
......@@ -1356,7 +1356,7 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) {
radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
......@@ -1518,14 +1518,13 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
if (cmd_buffer->device->physical_device->has_load_ctx_reg_pkt) {
radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
radeon_emit(cs, 2);
} else {
/* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
COPY_DATA_DST_SEL(COPY_DATA_REG) |
......
......@@ -84,7 +84,9 @@ VkResult radv_CreateDescriptorSetLayout(
uint32_t immutable_sampler_count = 0;
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
if (pCreateInfo->pBindings[j].pImmutableSamplers)
if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
pCreateInfo->pBindings[j].pImmutableSamplers)
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
}
......@@ -182,7 +184,9 @@ VkResult radv_CreateDescriptorSetLayout(
set_layout->has_variable_descriptors = true;
}
if (binding->pImmutableSamplers) {
if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
binding->pImmutableSamplers) {
set_layout->binding[b].immutable_samplers_offset = samplers_offset;
set_layout->binding[b].immutable_samplers_equal =
has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
......
......@@ -369,6 +369,11 @@ radv_physical_device_init(struct radv_physical_device *device,
device->dcc_msaa_allowed =
(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
/* TODO: Figure out how to use LOAD_CONTEXT_REG on SI/CIK. */
device->has_load_ctx_reg_pkt = device->rad_info.chip_class >= GFX9 ||
(device->rad_info.chip_class >= VI &&
device->rad_info.me_fw_feature >= 41);
radv_physical_device_init_mem_types(device);
radv_fill_device_extension_table(device, &device->supported_extensions);
......
......@@ -849,30 +849,26 @@ build_pipeline(struct radv_device *device,
.subpass = 0,
};
switch(aspect) {
case VK_IMAGE_ASPECT_COLOR_BIT:
vk_pipeline_info.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
VkPipelineColorBlendStateCreateInfo color_blend_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = (VkPipelineColorBlendAttachmentState []) {
{ .colorWriteMask =
VK_COLOR_COMPONENT_A_BIT |
{
.colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT },
}
};
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
vk_pipeline_info.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
VkPipelineDepthStencilStateCreateInfo depth_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.depthTestEnable = true,
.depthWriteEnable = true,
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
};
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
vk_pipeline_info.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
VkPipelineDepthStencilStateCreateInfo stencil_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.depthTestEnable = false,
.depthWriteEnable = false,
......@@ -897,6 +893,16 @@ build_pipeline(struct radv_device *device,
},
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
};
switch(aspect) {
case VK_IMAGE_ASPECT_COLOR_BIT:
vk_pipeline_info.pColorBlendState = &color_blend_info;
break;
case VK_IMAGE_ASPECT_DEPTH_BIT:
vk_pipeline_info.pDepthStencilState = &depth_info;
break;
case VK_IMAGE_ASPECT_STENCIL_BIT:
vk_pipeline_info.pDepthStencilState = &stencil_info;
break;
default:
unreachable("Unhandled aspect");
......
......@@ -2365,7 +2365,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
if (is_16bit) {
for (unsigned chan = 0; chan < 4; chan++)
values[chan] = LLVMBuildZExt(ctx->ac.builder,
values[chan],
ac_to_integer(&ctx->ac, values[chan]),
ctx->ac.i32, "");
}
break;
......@@ -2376,7 +2376,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
if (is_16bit) {
for (unsigned chan = 0; chan < 4; chan++)
values[chan] = LLVMBuildSExt(ctx->ac.builder,
values[chan],
ac_to_integer(&ctx->ac, values[chan]),
ctx->ac.i32, "");
}
break;
......@@ -2429,13 +2429,9 @@ si_llvm_init_export_args(struct radv_shader_context *ctx,
} else
memcpy(&args->out[0], values, sizeof(values[0]) * 4);
for (unsigned i = 0; i < 4; ++i) {
if (!(args->enabled_channels & (1 << i)))
continue;
for (unsigned i = 0; i < 4; ++i)
args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
}
}
static void
radv_export_param(struct radv_shader_context *ctx, unsigned index,
......
......@@ -524,6 +524,14 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
col_format |= cf << (4 * i);
}
if (!col_format && blend->need_src_alpha & (1 << 0)) {
/* When a subpass doesn't have any color attachments, write the
* alpha channel of MRT0 when alpha coverage is enabled because
* the depth attachment needs it.
*/
col_format |= V_028714_SPI_SHADER_32_ABGR;
}
/* If the i-th target format is set, all previous target formats must
* be non-zero to avoid hangs.
*/
......@@ -689,6 +697,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
if (vkms && vkms->alphaToCoverageEnable) {
blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
blend.need_src_alpha |= 0x1;
}
blend.cb_target_mask = 0;
......@@ -3192,11 +3201,11 @@ radv_compute_db_shader_control(const struct radv_device *device,
bool disable_rbplus = device->physical_device->has_rbplus &&
!device->physical_device->rbplus_allowed;
/* Do not enable the gl_SampleMask fragment shader output if MSAA is
* disabled.
/* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
* but this appears to break Project Cars (DXVK). See
* https://bugs.freedesktop.org/show_bug.cgi?id=109401
*/
bool mask_export_enable = ms->num_samples > 1 &&
ps->info.info.ps.writes_sample_mask;
bool mask_export_enable = ps->info.info.ps.writes_sample_mask;
return S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
......
......@@ -306,6 +306,9 @@ struct radv_physical_device {
/* Whether DCC should be enabled for MSAA textures. */
bool dcc_msaa_allowed;
/* Whether LOAD_CONTEXT_REG packets are supported. */
bool has_load_ctx_reg_pkt;
/* This is the drivers on-disk cache used as a fallback as opposed to
* the pipeline cache defined by apps.
*/
......
......@@ -159,7 +159,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
NIR_PASS(progress, shader, nir_opt_if);
NIR_PASS(progress, shader, nir_opt_dead_cf);
NIR_PASS(progress, shader, nir_opt_cse);
NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
NIR_PASS(progress, shader, nir_opt_algebraic);
NIR_PASS(progress, shader, nir_opt_constant_folding);
NIR_PASS(progress, shader, nir_opt_undef);
......
......@@ -101,7 +101,7 @@ gather_intrinsic_load_deref_info(const nir_shader *nir,
case MESA_SHADER_VERTEX: {
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
if (var->data.mode == nir_var_shader_in) {
if (var && var->data.mode == nir_var_shader_in) {
unsigned idx = var->data.location;
uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
......@@ -150,7 +150,7 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
{
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
if (var->data.mode == nir_var_shader_out) {
if (var && var->data.mode == nir_var_shader_out) {
unsigned idx = var->data.location;
switch (nir->info.stage) {
......
......@@ -665,6 +665,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
assert(num < ws->num_buffers);
handles[num].bo_handle = bo->bo_handle;
handles[num].bo_priority = bo->priority;
num++;
}
r = amdgpu_bo_list_create_raw(ws->dev, ws->num_buffers,
......
......@@ -121,7 +121,7 @@ vir_emit_thrsw(struct v3d_compile *c)
*/
c->last_thrsw = vir_NOP(c);
c->last_thrsw->qpu.sig.thrsw = true;
c->last_thrsw_at_top_level = (c->execute.file == QFILE_NULL);
c->last_thrsw_at_top_level = !c->in_control_flow;
}
static uint32_t
......@@ -1158,7 +1158,9 @@ emit_frag_end(struct v3d_compile *c)
inst->src[vir_get_implicit_uniform_src(inst)] =
vir_uniform_ui(c, tlb_specifier | 0xffffff00);
c->writes_z = true;
} else if (c->s->info.fs.uses_discard ||
!c->s->info.fs.early_fragment_tests ||
c->fs_key->sample_alpha_to_coverage ||
!has_any_tlb_color_write) {
/* Emit passthrough Z if it needed to be delayed until shader
......@@ -1188,6 +1190,7 @@ emit_frag_end(struct v3d_compile *c)
inst->src[vir_get_implicit_uniform_src(inst)] =
vir_uniform_ui(c, tlb_specifier | 0xffffff00);
c->writes_z = true;
}
/* XXX: Performance improvement: Merge Z write and color writes TLB
......@@ -1455,7 +1458,7 @@ v3d_optimize_nir(struct nir_shader *s)
NIR_PASS(progress, s, nir_opt_dce);
NIR_PASS(progress, s, nir_opt_dead_cf);
NIR_PASS(progress, s, nir_opt_cse);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
NIR_PASS(progress, s, nir_opt_algebraic);
NIR_PASS(progress, s, nir_opt_constant_folding);
NIR_PASS(progress, s, nir_opt_undef);
......@@ -2103,10 +2106,10 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
else
else_block = vir_new_block(c);
bool was_top_level = false;
bool was_uniform_control_flow = false;
if (c->execute.file == QFILE_NULL) {
c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
was_top_level = true;
was_uniform_control_flow = true;
}
/* Set up the flags for the IF condition (taking the THEN branch). */
......@@ -2122,7 +2125,7 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
/* Update the flags+cond to mean "Taking the ELSE branch (!cond) and
* was previously active (execute Z) for updating the exec flags.
*/
if (was_top_level) {
if (was_uniform_control_flow) {
cond = v3d_qpu_cond_invert(cond);
} else {
struct qinst *inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0),
......@@ -2176,7 +2179,7 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
vir_link_blocks(c->cur_block, after_block);
vir_set_emit_block(c, after_block);
if (was_top_level)
if (was_uniform_control_flow)
c->execute = c->undef;
else
ntq_activate_execute_for_block(c);
......@@ -2185,12 +2188,15 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
static void
ntq_emit_if(struct v3d_compile *c, nir_if *nif)
{
bool was_in_control_flow = c->in_control_flow;
c->in_control_flow = true;
if (c->execute.file == QFILE_NULL &&
nir_src_is_dynamically_uniform(nif->condition)) {
ntq_emit_uniform_if(c, nif);
} else {
ntq_emit_nonuniform_if(c, nif);
}
c->in_control_flow = was_in_control_flow;
}
static void
......@@ -2267,10 +2273,13 @@ static void ntq_emit_cf_list(struct v3d_compile *c, struct exec_list *list);
static void
ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
{
bool was_top_level = false;
bool was_in_control_flow = c->in_control_flow;
c->in_control_flow = true;
bool was_uniform_control_flow = false;
if (c->execute.file == QFILE_NULL) {
c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
was_top_level = true;
was_uniform_control_flow = true;
}
struct qblock *save_loop_cont_block = c->loop_cont_block;
......@@ -2307,7 +2316,7 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
vir_link_blocks(c->cur_block, c->loop_break_block);
vir_set_emit_block(c, c->loop_break_block);
if (was_top_level)
if (was_uniform_control_flow)
c->execute = c->undef;
else
ntq_activate_execute_for_block(c);
......@@ -2316,6 +2325,8 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
c->loop_cont_block = save_loop_cont_block;
c->loops++;
c->in_control_flow = was_in_control_flow;
}
static void
......