Skip to content
Commits on Source (44)
......@@ -3,3 +3,22 @@ ab0809e5529725bd0af6f7b6ce06415020b9d32e meson: fix strtof locale support check
# fixes: The commit addresses Meson which is explicitly disabled for 17.3
44fbbd6fd07e5784b05e08e762e54b6c71f95ab1 util: add mesa-sha1 test to meson
# stable: The commit is causing a regression
# (https://bugs.freedesktop.org/show_bug.cgi?id=103626)
18fde36ced4279f2577097a1a7d31b55f2f5f141 intel/fs: Use the original destination region for int MUL lowering
# stable: The commit addresses earlier commit 6132992cdb which did not land in
# branch
3d2b157e23c9d66df97d59be6efd1098878cc110 i965/fs: Use UW types when using V immediates
# extra: The commit just references a fix for an additional change in its v2.
c1ff99fd70cd2ceb2cac4723e4fd5efc93834746 main: Clear shader program data whenever ProgramBinary is called
# fixes: The commit addresses earlier commits 40a01c9a0ef and 8d745abc009 which
# did not land in branch
9b0223046668593deb9c0be0b557994bb5218788 egl: pass the dri2_dpy to the $plat_teardown functions
# fixes: The commit addresses earlier commit d50937f137 which did not land in
# branch
78a8b73e7d45f55ced98a148b26247d91f4e0171 vulkan/wsi: free cmd pools
mesa (17.3.3-1) unstable; urgency=medium
* New upstream release.
* Bump debhelper compat to 10.
* Bump standards version to 4.1.3.
-- Andreas Boll <andreas.boll.dev@gmail.com> Fri, 19 Jan 2018 20:34:29 +0100
mesa (17.3.2-1) unstable; urgency=medium
* New upstream release.
......
......@@ -3,11 +3,10 @@ Section: graphics
Priority: optional
Maintainer: Debian X Strike Force <debian-x@lists.debian.org>
Uploaders: Andreas Boll <andreas.boll.dev@gmail.com>
Standards-Version: 4.1.2
Standards-Version: 4.1.3
Build-Depends:
debhelper (>= 9),
dh-autoreconf,
quilt (>= 0.40),
debhelper (>= 10),
quilt (>= 0.63-8.2~),
pkg-config,
libdrm-dev (>= 2.4.85) [!hurd-any],
libx11-dev,
......
......@@ -227,8 +227,7 @@ override_dh_makeshlibs:
dh_makeshlibs -a -- -c4
%:
dh $@ --with quilt,autoreconf,libva \
--parallel \
dh $@ --with quilt,libva \
--builddirectory=build/
# For maintainer use only, generate a tarball:
......
......@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
<h2>SHA256 checksums</h2>
<pre>
TBD
f997e80f14c385f9a2ba827c2b74aebf1b7426712ca4a81c631ef9f78e437bf4 mesa-17.3.2.tar.gz
e2844a13f2d6f8f24bee65804a51c42d8dc6ae9c36cff7ee61d0940e796d64c6 mesa-17.3.2.tar.xz
</pre>
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.3.3 Release Notes / January 18, 2018</h1>
<p>
Mesa 17.3.3 is a bug fix release which fixes bugs found since the 17.3.2 release.
</p>
<p>
Mesa 17.3.3 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
TBD
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104214">Bug 104214</a> - Dota crashes when switching from game to desktop</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104492">Bug 104492</a> - Compute Shader: Wrong alignment when assigning struct value to structured SSBO</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104551">Bug 104551</a> - Check if Mako templates for Python are installed</li>
</ul>
<h2>Changes</h2>
<p>Alex Smith (3):</p>
<ul>
<li>anv: Add missing unlock in anv_scratch_pool_alloc</li>
<li>anv: Take write mask into account in has_color_buffer_write_enabled</li>
<li>anv: Make sure state on primary is correct after CmdExecuteCommands</li>
</ul>
<p>Andres Gomez (1):</p>
<ul>
<li>anv: Import mako templates only during execution of anv_extensions</li>
</ul>
<p>Bas Nieuwenhuizen (11):</p>
<ul>
<li>radv: Invert condition for all samples identical during resolve.</li>
<li>radv: Flush caches before subpass resolve.</li>
<li>radv: Fix fragment resolve destination offset.</li>
<li>radv: Use correct framebuffer size for partial FS resolves.</li>
<li>radv: Always use fragment resolve if dest uses DCC.</li>
<li>Revert "radv/gfx9: fix block compression texture views."</li>
<li>radv: Use correct HTILE expanded words.</li>
<li>radv: Allow writing 0 scissors.</li>
<li>ac/nir: Handle loading data from compact arrays.</li>
<li>radv: Invalidate L1 for VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT.</li>
<li>ac/nir: Sanitize location_frac for local variables.</li>
</ul>
<p>Dave Airlie (8):</p>
<ul>
<li>radv: fix events on compute queues.</li>
<li>radv: fix pipeline statistics end query on compute queue</li>
<li>radv/gfx9: fix 3d image to image transfers on compute queues.</li>
<li>radv/gfx9: fix 3d image clears on compute queues</li>
<li>radv/gfx9: fix buffer to image for 3d images on compute queues</li>
<li>radv/gfx9: fix block compression texture views.</li>
<li>radv/gfx9: use a bigger hammer to flush cb/db caches.</li>
<li>radv/gfx9: use correct swizzle parameter to work out border swizzle.</li>
</ul>
<p>Emil Velikov (1):</p>
<ul>
<li>docs: add sha256 checksums for 17.3.2</li>
</ul>
<p>Florian Will (1):</p>
<ul>
<li>glsl: Respect std430 layout in lower_buffer_access</li>
</ul>
<p>Juan A. Suarez Romero (6):</p>
<ul>
<li>cherry-ignore: intel/fs: Use the original destination region for int MUL lowering</li>
<li>cherry-ignore: i965/fs: Use UW types when using V immediates</li>
<li>cherry-ignore: main: Clear shader program data whenever ProgramBinary is called</li>
<li>cherry-ignore: egl: pass the dri2_dpy to the $plat_teardown functions</li>
<li>cherry-ignore: vulkan/wsi: free cmd pools</li>
<li>Update version to 17.3.3</li>
</ul>
<p>Józef Kucia (1):</p>
<ul>
<li>radeonsi: fix alpha-to-coverage if color writes are disabled</li>
</ul>
<p>Kenneth Graunke (2):</p>
<ul>
<li>i965: Require space for MI_BATCHBUFFER_END.</li>
<li>i965: Torch public intel_batchbuffer_emit_dword/float helpers.</li>
</ul>
<p>Lucas Stach (1):</p>
<ul>
<li>etnaviv: disable in-place resolve for non-supertiled surfaces</li>
</ul>
<p>Samuel Iglesias Gonsálvez (1):</p>
<ul>
<li>anv: VkDescriptorSetLayoutBinding can have descriptorCount == 0</li>
</ul>
<p>Thomas Hellstrom (1):</p>
<ul>
<li>loader/dri3: Avoid freeing renderbuffers in use</li>
</ul>
<p>Tim Rowley (1):</p>
<ul>
<li>swr/rast: fix invalid sign masks in avx512 simdlib code</li>
</ul>
</div>
</body>
</html>
......@@ -3073,6 +3073,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
LLVMValueRef indir_index;
LLVMValueRef ret;
unsigned const_index;
unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
instr->variables[0]->var->data.mode == nir_var_shader_in;
get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
......@@ -3098,13 +3099,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
count -= chan / 4;
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
&ctx->ac, ctx->abi->inputs + idx + chan, count,
4, false, true);
stride, false, true);
values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
tmp_vec,
indir_index, "");
} else
values[chan] = ctx->abi->inputs[idx + chan + const_index * 4];
values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
}
break;
case nir_var_local:
......@@ -3115,13 +3116,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
count -= chan / 4;
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
&ctx->ac, ctx->locals + idx + chan, count,
4, true, true);
stride, true, true);
values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
tmp_vec,
indir_index, "");
} else {
values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * 4], "");
values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
}
}
break;
......@@ -3143,14 +3144,14 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
count -= chan / 4;
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
&ctx->ac, ctx->outputs + idx + chan, count,
4, true, true);
stride, true, true);
values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
tmp_vec,
indir_index, "");
} else {
values[chan] = LLVMBuildLoad(ctx->ac.builder,
ctx->outputs[idx + chan + const_index * 4],
ctx->outputs[idx + chan + const_index * stride],
"");
}
}
......@@ -5446,6 +5447,7 @@ setup_locals(struct ac_nir_context *ctx,
nir_foreach_variable(variable, &func->impl->locals) {
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
variable->data.driver_location = ctx->num_locals * 4;
variable->data.location_frac = 0;
ctx->num_locals += attrib_count;
}
ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
......
......@@ -1919,11 +1919,11 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
switch ((VkAccessFlagBits)(1 << b)) {
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
case VK_ACCESS_INDEX_READ_BIT:
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
break;
case VK_ACCESS_UNIFORM_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
break;
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
case VK_ACCESS_SHADER_READ_BIT:
case VK_ACCESS_TRANSFER_READ_BIT:
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
......@@ -3583,7 +3583,8 @@ void radv_CmdEndRenderPass(
/*
* For HTILE we have the following interesting clear words:
* 0x0000030f: Uncompressed.
* 0x0000030f: Uncompressed for depth+stencil HTILE.
* 0x0000000f: Uncompressed for depth only HTILE.
* 0xfffffff0: Clear depth to 1.0
* 0x00000000: Clear depth to 0.0
*/
......@@ -3632,7 +3633,8 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
radv_initialize_htile(cmd_buffer, image, range, 0);
} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
radv_initialize_htile(cmd_buffer, image, range, 0xffffffff);
uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0x30f : 0xf;
radv_initialize_htile(cmd_buffer, image, range, clear_value);
} else if (radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
!radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
VkImageSubresourceRange local_range = *range;
......@@ -3834,7 +3836,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
si_cs_emit_write_event_eop(cs,
cmd_buffer->state.predicating,
cmd_buffer->device->physical_device->rad_info.chip_class,
false,
radv_cmd_buffer_uses_mec(cmd_buffer),
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, va, 2, value);
......
......@@ -344,7 +344,7 @@ static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
}
}
static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
{
unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
......@@ -449,7 +449,7 @@ si_make_texture_descriptor(struct radv_device *device,
state[7] = 0;
if (device->physical_device->rad_info.chip_class >= GFX9) {
unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
/* Depth is the the last accessible layer on Gfx9.
* The hw doesn't need to know the total number of layers.
......
......@@ -533,7 +533,7 @@ void radv_meta_build_resolve_shader_core(nir_builder *b,
nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
nir_builder_instr_insert(b, &tex_all_same->instr);
nir_ssa_def *all_same = nir_ine(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
nir_if *if_stmt = nir_if_create(b->shader);
if_stmt->condition = nir_src_for_ssa(all_same);
nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
......
......@@ -259,19 +259,20 @@ radv_device_finish_meta_itob_state(struct radv_device *device)
}
static nir_shader *
build_nir_btoi_compute_shader(struct radv_device *dev)
build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
{
nir_builder b;
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
false,
false,
GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
const struct glsl_type *img_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_cs");
b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
......@@ -296,16 +297,16 @@ build_nir_btoi_compute_shader(struct radv_device *dev)
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(offset, 0);
nir_intrinsic_set_range(offset, 12);
nir_intrinsic_set_range(offset, 16);
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
offset->num_components = 2;
nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
offset->num_components = is_3d ? 3 : 2;
nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
nir_builder_instr_insert(&b, &offset->instr);
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(stride, 0);
nir_intrinsic_set_range(stride, 12);
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
nir_intrinsic_set_range(stride, 16);
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
stride->num_components = 1;
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
nir_builder_instr_insert(&b, &stride->instr);
......@@ -353,9 +354,10 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
cs.nir = build_nir_btoi_compute_shader(device);
struct radv_shader_module cs_3d = { .nir = NULL };
cs.nir = build_nir_btoi_compute_shader(device, false);
if (device->physical_device->rad_info.chip_class >= GFX9)
cs_3d.nir = build_nir_btoi_compute_shader(device, true);
/*
* two descriptors one for the image being sampled
* one for the buffer being written.
......@@ -395,7 +397,7 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
......@@ -429,9 +431,33 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail;
if (device->physical_device->rad_info.chip_class >= GFX9) {
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = radv_shader_module_to_handle(&cs_3d),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage_3d,
.flags = 0,
.layout = device->meta_state.btoi.img_p_layout,
};
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &vk_pipeline_info_3d, NULL,
&device->meta_state.btoi.pipeline_3d);
ralloc_free(cs_3d.nir);
}
ralloc_free(cs.nir);
return VK_SUCCESS;
fail:
ralloc_free(cs_3d.nir);
ralloc_free(cs.nir);
return result;
}
......@@ -448,22 +474,25 @@ radv_device_finish_meta_btoi_state(struct radv_device *device)
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->btoi.pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->btoi.pipeline_3d, &state->alloc);
}
static nir_shader *
build_nir_itoi_compute_shader(struct radv_device *dev)
build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
{
nir_builder b;
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *buf_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
const struct glsl_type *img_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_cs");
b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
......@@ -488,18 +517,18 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(src_offset, 0);
nir_intrinsic_set_range(src_offset, 16);
nir_intrinsic_set_range(src_offset, 24);
src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
src_offset->num_components = 2;
nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
src_offset->num_components = is_3d ? 3 : 2;
nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
nir_builder_instr_insert(&b, &src_offset->instr);
nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(dst_offset, 0);
nir_intrinsic_set_range(dst_offset, 16);
dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
dst_offset->num_components = 2;
nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
nir_intrinsic_set_range(dst_offset, 24);
dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
dst_offset->num_components = is_3d ? 3 : 2;
nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
nir_builder_instr_insert(&b, &dst_offset->instr);
nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
......@@ -507,15 +536,15 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
tex->sampler_dim = dim;
tex->op = nir_texop_txf;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 3));
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
tex->src[1].src_type = nir_tex_src_lod;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
tex->dest_type = nir_type_float;
tex->is_array = false;
tex->coord_components = 2;
tex->coord_components = is_3d ? 3 : 2;
tex->texture = nir_deref_var_create(tex, input_img);
tex->sampler = NULL;
......@@ -539,9 +568,10 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
cs.nir = build_nir_itoi_compute_shader(device);
struct radv_shader_module cs_3d = { .nir = NULL };
cs.nir = build_nir_itoi_compute_shader(device, false);
if (device->physical_device->rad_info.chip_class >= GFX9)
cs_3d.nir = build_nir_itoi_compute_shader(device, true);
/*
* two descriptors one for the image being sampled
* one for the buffer being written.
......@@ -581,7 +611,7 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
......@@ -615,10 +645,35 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail;
if (device->physical_device->rad_info.chip_class >= GFX9) {
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = radv_shader_module_to_handle(&cs_3d),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage_3d,
.flags = 0,
.layout = device->meta_state.itoi.img_p_layout,
};
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &vk_pipeline_info_3d, NULL,
&device->meta_state.itoi.pipeline_3d);
ralloc_free(cs_3d.nir);
}
ralloc_free(cs.nir);
return VK_SUCCESS;
fail:
ralloc_free(cs.nir);
ralloc_free(cs_3d.nir);
return result;
}
......@@ -634,18 +689,22 @@ radv_device_finish_meta_itoi_state(struct radv_device *device)
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->itoi.pipeline, &state->alloc);
if (device->physical_device->rad_info.chip_class >= GFX9)
radv_DestroyPipeline(radv_device_to_handle(device),
state->itoi.pipeline_3d, &state->alloc);
}
static nir_shader *
build_nir_cleari_compute_shader(struct radv_device *dev)
build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
{
nir_builder b;
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
const struct glsl_type *img_type = glsl_sampler_type(dim,
false,
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_cs");
b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
......@@ -666,12 +725,29 @@ build_nir_cleari_compute_shader(struct radv_device *dev)
nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(clear_val, 0);
nir_intrinsic_set_range(clear_val, 16);
nir_intrinsic_set_range(clear_val, 20);
clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
clear_val->num_components = 4;
nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
nir_builder_instr_insert(&b, &clear_val->instr);
nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(layer, 0);
nir_intrinsic_set_range(layer, 20);
layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
layer->num_components = 1;
nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
nir_builder_instr_insert(&b, &layer->instr);
nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
nir_ssa_def *comps[4];
comps[0] = nir_channel(&b, global_id, 0);
comps[1] = nir_channel(&b, global_id, 1);
comps[2] = global_z;
comps[3] = nir_imm_int(&b, 0);
global_id = nir_vec(&b, comps, 4);
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
store->src[0] = nir_src_for_ssa(global_id);
store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
......@@ -687,8 +763,10 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
cs.nir = build_nir_cleari_compute_shader(device);
struct radv_shader_module cs_3d = { .nir = NULL };
cs.nir = build_nir_cleari_compute_shader(device, false);
if (device->physical_device->rad_info.chip_class >= GFX9)
cs_3d.nir = build_nir_cleari_compute_shader(device, true);
/*
* two descriptors one for the image being sampled
......@@ -722,7 +800,7 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
......@@ -756,10 +834,38 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail;
if (device->physical_device->rad_info.chip_class >= GFX9) {
/* compute shader */
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = radv_shader_module_to_handle(&cs_3d),
.pName = "main",
.pSpecializationInfo = NULL,
};
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = pipeline_shader_stage_3d,
.flags = 0,
.layout = device->meta_state.cleari.img_p_layout,
};
result = radv_CreateComputePipelines(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &vk_pipeline_info_3d, NULL,
&device->meta_state.cleari.pipeline_3d);
if (result != VK_SUCCESS)
goto fail;
ralloc_free(cs_3d.nir);
}
ralloc_free(cs.nir);
return VK_SUCCESS;
fail:
ralloc_free(cs.nir);
ralloc_free(cs_3d.nir);
return result;
}
......@@ -775,6 +881,8 @@ radv_device_finish_meta_cleari_state(struct radv_device *device)
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->cleari.pipeline, &state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->cleari.pipeline_3d, &state->alloc);
}
void
......@@ -990,18 +1098,22 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
create_iview(cmd_buffer, dst, &dst_view);
btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
if (device->physical_device->rad_info.chip_class >= GFX9 &&
dst->image->type == VK_IMAGE_TYPE_3D)
pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (unsigned r = 0; r < num_rects; ++r) {
unsigned push_constants[3] = {
unsigned push_constants[4] = {
rects[r].dst_x,
rects[r].dst_y,
src->pitch
dst->layer,
src->pitch,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.btoi.img_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
......@@ -1068,19 +1180,24 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
if (device->physical_device->rad_info.chip_class >= GFX9 &&
src->image->type == VK_IMAGE_TYPE_3D)
pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
for (unsigned r = 0; r < num_rects; ++r) {
unsigned push_constants[4] = {
unsigned push_constants[6] = {
rects[r].src_x,
rects[r].src_y,
src->layer,
rects[r].dst_x,
rects[r].dst_y,
dst->layer,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.itoi.img_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
push_constants);
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
......@@ -1128,19 +1245,24 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
create_iview(cmd_buffer, dst, &dst_iview);
cleari_bind_descriptors(cmd_buffer, &dst_iview);
if (device->physical_device->rad_info.chip_class >= GFX9 &&
dst->image->type == VK_IMAGE_TYPE_3D)
pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
unsigned push_constants[4] = {
unsigned push_constants[5] = {
clear_color->uint32[0],
clear_color->uint32[1],
clear_color->uint32[2],
clear_color->uint32[3],
dst->layer,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.cleari.img_p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
push_constants);
radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
......
......@@ -300,10 +300,9 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image,
enum radv_resolve_method *method)
{
if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
if (dest_image->surface.num_dcc_levels > 0)
if (dest_image->surface.num_dcc_levels > 0) {
*method = RESOLVE_FRAGMENT;
else
} else if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
*method = RESOLVE_COMPUTE;
}
}
......
......@@ -487,6 +487,14 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
if (!subpass->has_resolve)
return;
/* Resolves happen before the end-of-subpass barriers get executed,
* so we have to make the attachment shader-readable */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_INV_VMEM_L1;
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
......
......@@ -407,8 +407,8 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
unsigned push_constants[2] = {
src_offset->x,
src_offset->y,
src_offset->x - dest_offset->x,
src_offset->y - dest_offset->y,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.resolve_fragment.p_layout,
......@@ -540,8 +540,8 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
.pAttachments = (VkImageView[]) {
radv_image_view_to_handle(&dest_iview),
},
.width = extent.width,
.height = extent.height,
.width = extent.width + dstOffset.x,
.height = extent.height + dstOffset.y,
.layers = 1
}, &cmd_buffer->pool->alloc, &fb);
......@@ -604,6 +604,16 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
RADV_META_SAVE_CONSTANTS |
RADV_META_SAVE_DESCRIPTORS);
/* Resolves happen before the end-of-subpass barriers get executed,
* so we have to make the attachment shader-readable */
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
RADV_CMD_FLAG_FLUSH_AND_INV_DB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_INV_VMEM_L1;
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
......
......@@ -433,16 +433,19 @@ struct radv_meta_state {
VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
VkPipeline pipeline_3d;
} btoi;
struct {
VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
VkPipeline pipeline_3d;
} itoi;
struct {
VkPipelineLayout img_p_layout;
VkDescriptorSetLayout img_ds_layout;
VkPipeline pipeline;
VkPipeline pipeline_3d;
} cleari;
struct {
......
......@@ -1152,7 +1152,7 @@ void radv_CmdEndQuery(
si_cs_emit_write_event_eop(cs,
false,
cmd_buffer->device->physical_device->rad_info.chip_class,
false,
radv_cmd_buffer_uses_mec(cmd_buffer),
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, avail_va, 0, 1);
break;
......
......@@ -676,7 +676,8 @@ si_write_scissors(struct radeon_winsys_cs *cs, int first,
int i;
float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
const float max_range = 32767.0f;
assert(count);
if (!count)
return;
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
for (i = 0; i < count; i++) {
......@@ -988,6 +989,11 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
if (chip_class >= GFX9 && flush_cb_db) {
unsigned cb_db_event, tc_flags;
#if 0
/* This breaks a bunch of:
dEQP-VK.renderpass.dedicated_allocation.formats.d32_sfloat_s8_uint.input*.
use the big hammer always.
*/
/* Set the CB/DB flush event. */
switch (flush_cb_db) {
case RADV_CMD_FLAG_FLUSH_AND_INV_CB:
......@@ -1000,7 +1006,9 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
/* both CB & DB */
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
}
#else
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
#endif
/* TC | TC_WB = invalidate L2 data
* TC_MD | TC_WB = invalidate L2 metadata
* TC | TC_WB | TC_MD = invalidate L2 data & metadata
......
......@@ -72,15 +72,21 @@ lower_buffer_access::emit_access(void *mem_ctx,
new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
field->name);
field_offset =
glsl_align(field_offset,
field->type->std140_base_alignment(row_major));
unsigned field_align;
if (packing == GLSL_INTERFACE_PACKING_STD430)
field_align = field->type->std430_base_alignment(row_major);
else
field_align = field->type->std140_base_alignment(row_major);
field_offset = glsl_align(field_offset, field_align);
emit_access(mem_ctx, is_write, field_deref, base_offset,
deref_offset + field_offset,
row_major, 1, packing,
writemask_for_size(field_deref->type->vector_elements));
if (packing == GLSL_INTERFACE_PACKING_STD430)
field_offset += field->type->std430_size(row_major);
else
field_offset += field->type->std140_size(row_major);
}
return;
......