Skip to content
Commits on Source (39)
......@@ -14,3 +14,19 @@ a26693493570a9d0f0fba1be617e01ee7bfff4db
# 18.1
#
a1220e73116bad74f39c1792a0b0cf0e4e5031db
# This doesn't apply and isn't necessary since
# 1cc2e0cc6b47bd5efbf2af266405060785085e6b isn't in the 18.1 branch
#
587e712eda95c31d88ea9d20e59ad0ae59afef4f
# This requires too many previous patch, and Marek (the author) decided to
# to drop it from stable
#
cac7ab1192eefdd8d8b3f25053fb006b5c330eb8
# This patch is excluded since it requires additional patches to be pulled,
# and is mainly aimed at developers, who rarely (if ever) work in the
# stable branch
#
a2f5292c82ad07731d633b36a663e46adc181db9
......@@ -2219,13 +2219,13 @@ else
have_vdpau_platform=no
fi
if echo $platforms | grep -q "x11\|drm"; then
if echo $platforms | egrep -q "x11|drm"; then
have_omx_platform=yes
else
have_omx_platform=no
fi
if echo $platforms | grep -q "x11\|drm\|wayland"; then
if echo $platforms | egrep -q "x11|drm|wayland"; then
have_va_platform=yes
else
have_va_platform=no
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 18.1.2 Release Notes / June 15 2018</h1>
<p>
Mesa 18.1.2 is a bug fix release which fixes bugs found since the 18.1.1 release.
</p>
<p>
Mesa 18.1.2 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation.
Compatibility contexts may report a lower version depending on each driver.
</p>
<h2>SHA256 checksums</h2>
<pre>
a644df23937f4078a2bd9a54349f6315c1955f5e3a4ac272832da51dea4d3c11 mesa-18.1.1.tar.gz
070bf0648ba5b242d7303ceed32aed80842f4c0ba16e5acc1a650a46eadfb1f9 mesa-18.1.1.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<p>None<p>
<h2>Changes</h2>
<p>Alex Smith (4):</p>
<ul>
<li>radv: Consolidate GFX9 merged shader lookup logic</li>
<li>radv: Handle GFX9 merged shaders in radv_flush_constants()</li>
<li>radeonsi: Fix crash on shaders using MSAA image load/store</li>
<li>radv: Set active_stages the same whether or not shaders were cached</li>
</ul>
<p>Andrew Galante (2):</p>
<ul>
<li>meson: Test for __atomic_add_fetch in atomic checks</li>
<li>configure.ac: Test for __atomic_add_fetch in atomic checks</li>
</ul>
<p>Bas Nieuwenhuizen (1):</p>
<ul>
<li>radv: Don't pass a TESS_EVAL shader when tesselation is not enabled.</li>
</ul>
<p>Cameron Kumar (1):</p>
<ul>
<li>vulkan/wsi: Destroy swapchain images after terminating FIFO queues</li>
</ul>
<p>Dylan Baker (6):</p>
<ul>
<li>docs/relnotes: Add sha256 sums for mesa 18.1.1</li>
<li>cherry-ignore: add commits not to pull</li>
<li>cherry-ignore: Add patches from Jason that he rebased on 18.1</li>
<li>meson: work around gentoo applying -m32 to host compiler in cross builds</li>
<li>cherry-ignore: Add another patch</li>
<li>version: bump version for 18.1.2 release</li>
</ul>
<p>Eric Engestrom (3):</p>
<ul>
<li>autotools: add missing android file to package</li>
<li>configure: radv depends on mako</li>
<li>i965: fix resource leak</li>
</ul>
<p>Jason Ekstrand (10):</p>
<ul>
<li>intel/eu: Add some brw_get_default_ helpers</li>
<li>intel/eu: Copy fields manually in brw_next_insn</li>
<li>intel/eu: Set flag [sub]register number differently for 3src</li>
<li>intel/blorp: Don't vertex fetch directly from clear values</li>
<li>intel/isl: Add bounds-checking assertions in isl_format_get_layout</li>
<li>intel/isl: Add bounds-checking assertions for the format_info table</li>
<li>i965/screen: Refactor query_dma_buf_formats</li>
<li>i965/screen: Use RGBA non-sRGB formats for images</li>
<li>anv: Set fence/semaphore types to NONE in impl_cleanup</li>
<li>i965/screen: Return false for unsupported formats in query_modifiers</li>
</ul>
<p>Jordan Justen (1):</p>
<ul>
<li>mesa/program_binary: add implicit UseProgram after successful ProgramBinary</li>
</ul>
<p>Juan A. Suarez Romero (1):</p>
<ul>
<li>glsl: Add ir_binop_vector_extract in NIR</li>
</ul>
<p>Kenneth Graunke (2):</p>
<ul>
<li>i965: Fix batch-last mode to properly swap BOs.</li>
<li>anv: Disable __gen_validate_value if NDEBUG is set.</li>
</ul>
<p>Marek Olšák (1):</p>
<ul>
<li>r300g/swtcl: make pipe_context uploaders use malloc'd memory as before</li>
</ul>
<p>Matt Turner (1):</p>
<ul>
<li>meson: Fix -latomic check</li>
</ul>
<p>Michel Dänzer (1):</p>
<ul>
<li>glx: Fix number of property values to read in glXImportContextEXT</li>
</ul>
<p>Nicolas Boichat (1):</p>
<ul>
<li>configure.ac/meson.build: Fix -latomic test</li>
</ul>
<p>Philip Rebohle (1):</p>
<ul>
<li>radv: Use correct color format for fast clears</li>
</ul>
<p>Samuel Pitoiset (3):</p>
<ul>
<li>radv: fix a GPU hang when MRTs are sparse</li>
<li>radv: fix missing ZRANGE_PRECISION(1) for GFX9+</li>
<li>radv: add a workaround for DXVK hangs by setting amdgpu-skip-threshold</li>
</ul>
<p>Scott D Phillips (1):</p>
<ul>
<li>intel/tools: add intel_sanitize_gpu to EXTRA_DIST</li>
</ul>
<p>Thomas Petazzoni (1):</p>
<ul>
<li>configure.ac: rework -latomic check</li>
</ul>
<p>Timothy Arceri (2):</p>
<ul>
<li>ac: fix possible truncation of intrinsic name</li>
<li>radeonsi: fix possible truncation on renderer string</li>
</ul>
</div>
</body>
</html>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 18.1.3 Release Notes / June 29 2018</h1>
<p>
Mesa 18.1.3 is a bug fix release which fixes bugs found since the 18.1.2 release.
</p>
<p>
Mesa 18.1.2 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation.
Compatibility contexts may report a lower version depending on each driver.
</p>
<h2>SHA256 checksums</h2>
<pre>
TBD mesa-18.1.3.tar.gz
TBD mesa-18.1.3.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105396">Bug 105396</a> - tc compatible htile sets depth of htiles of discarded fragments to 1.0</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105399">Bug 105399</a> - [snb] GPU hang: after geometry shader emits no geometry, the program hangs</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106756">Bug 106756</a> - Wine 3.9 crashes with DXVK on Just Cause 3 and Quantum Break on VEGA but works ON POLARIS</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106774">Bug 106774</a> - GLSL IR copy propagates loads of SSBOs</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106903">Bug 106903</a> - radv: Fragment shader output goes to wrong attachments when render targets are sparse</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106907">Bug 106907</a> - Correct Transform Feedback Varyings information is expected after using ProgramBinary</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106912">Bug 106912</a> - radv: 16-bit depth buffer causes artifacts in Shadow Warrior 2</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106980">Bug 106980</a> - Basemark GPU vulkan benchmark fails.</li>
</ul>
<h2>Changes</h2>
<p>Andrii Simiklit (1):</p>
<ul>
<li>i965/gen6/gs: Handle case where a GS doesn't allocate VUE</li>
</ul>
<p>Bas Nieuwenhuizen (2):</p>
<ul>
<li>radv: Fix output for sparse MRTs.</li>
<li>ac/surface: Set compressZ for stencil-only surfaces.</li>
</ul>
<p>Christian Gmeiner (1):</p>
<ul>
<li>util/bitset: include util/macro.h</li>
</ul>
<p>Dave Airlie (1):</p>
<ul>
<li>glsl: allow standalone semicolons outside main()</li>
</ul>
<p>Dylan Baker (8):</p>
<ul>
<li>docs: Add release notes for 18.1.2</li>
<li>cherry-ignore: Add 587e712eda95c31d88ea9d20e59ad0ae59afef4f</li>
<li>meson: Fix auto option for va</li>
<li>meson: Fix auto option for xvmc</li>
<li>meson: Correct behavior of vdpau=auto</li>
<li>cherry-ignore: Ignore cac7ab1192eefdd8d8b3f25053fb006b5c330eb8</li>
<li>cherry-ignore: add a2f5292c82ad07731d633b36a663e46adc181db9</li>
<li>VERSION: bump version to 18.1.3</li>
</ul>
<p>Emil Velikov (2):</p>
<ul>
<li>configure: use compliant grep regex checks</li>
<li>glsl/tests/glcpp: reinstate "error out if no tests found"</li>
</ul>
<p>Eric Engestrom (3):</p>
<ul>
<li>radv: fix reported number of available VGPRs</li>
<li>radv: fix bitwise check</li>
<li>meson: fix i965/anv/isl genX static lib names</li>
</ul>
<p>Ian Romanick (2):</p>
<ul>
<li>glsl: Don't copy propagate from SSBO or shared variables either</li>
<li>glsl: Don't copy propagate elements from SSBO or shared variables either</li>
</ul>
<p>Jason Ekstrand (2):</p>
<ul>
<li>nir: Handle call instructions in foreach_src</li>
<li>nir/validate: Use the type from the tail of call parameter derefs</li>
</ul>
<p>Lukas Rusak (2):</p>
<ul>
<li>meson: only build vl_winsys_dri.c when x11 platform is used</li>
<li>meson: fix private libs when building without glx</li>
</ul>
<p>Marek Olšák (5):</p>
<ul>
<li>radeonsi/gfx9: fix si_get_buffer_from_descriptors for 48-bit pointers</li>
<li>ac/gpu_info: report real total memory sizes</li>
<li>ac/gpu_info: add kernel_flushes_hdp_before_ib</li>
<li>radeonsi: always put persistent buffers into GTT on radeon</li>
<li>mesa: fix glGetInteger64v for arrays of integers</li>
</ul>
<p>Rob Clark (1):</p>
<ul>
<li>freedreno/ir3: fix base_vertex</li>
</ul>
<p>Samuel Pitoiset (6):</p>
<ul>
<li>radv: don't fast clear HTILE for 16-bit depth surfaces on GFX8</li>
<li>radv: update the ZRANGE_PRECISION value for the TC-compat bug</li>
<li>radv: fix emitting the TCS regs on GFX9</li>
<li>radv: fix HTILE metadata initialization in presence of subpass clears</li>
<li>radv: ignore pInheritanceInfo for primary command buffers</li>
<li>radv: use separate bind points for the dynamic buffers</li>
</ul>
<p>Tapani Pälli (1):</p>
<ul>
<li>glsl: serialize data from glTransformFeedbackVaryings</li>
</ul>
<p>Tomeu Vizoso (1):</p>
<ul>
<li>virgl: Remove debugging left-overs</li>
</ul>
</div>
</body>
</html>
......@@ -420,16 +420,17 @@ elif not (with_gallium_r300 or with_gallium_r600 or with_gallium_radeonsi or
else
_vdpau = 'false'
endif
elif _vdpau == 'auto'
_vdpau = 'true'
endif
with_gallium_vdpau = _vdpau == 'true'
dep_vdpau = null_dep
if with_gallium_vdpau
dep_vdpau = dependency('vdpau', version : '>= 1.1')
with_gallium_vdpau = false
if _vdpau != 'false'
dep_vdpau = dependency('vdpau', version : '>= 1.1', required : _vdpau == 'true')
if dep_vdpau.found()
dep_vdpau = declare_dependency(
compile_args : run_command(prog_pkgconfig, ['vdpau', '--cflags']).stdout().split()
)
with_gallium_vdpau = true
endif
endif
if with_gallium_vdpau
......@@ -459,13 +460,12 @@ elif not (with_gallium_r600 or with_gallium_nouveau)
else
_xvmc = 'false'
endif
elif _xvmc == 'auto'
_xvmc = 'true'
endif
with_gallium_xvmc = _xvmc == 'true'
dep_xvmc = null_dep
if with_gallium_xvmc
dep_xvmc = dependency('xvmc', version : '>= 1.0.6')
with_gallium_xvmc = false
if _xvmc != 'false'
dep_xvmc = dependency('xvmc', version : '>= 1.0.6', required : _xvmc == 'true')
with_gallium_xvmc = dep_xvmc.found()
endif
xvmc_drivers_path = get_option('xvmc-libs-path')
......@@ -581,13 +581,16 @@ elif not (with_gallium_r600 or with_gallium_radeonsi or with_gallium_nouveau)
elif _va == 'auto'
_va = 'true'
endif
with_gallium_va = _va == 'true'
with_gallium_va = false
dep_va = null_dep
if with_gallium_va
dep_va = dependency('libva', version : '>= 0.39.0')
if _va != 'false'
dep_va = dependency('libva', version : '>= 0.38.0', required : _va == 'true')
if dep_va.found()
dep_va_headers = declare_dependency(
compile_args : run_command(prog_pkgconfig, ['libva', '--cflags']).stdout().split()
)
with_gallium_va = true
endif
endif
va_drivers_path = get_option('va-libs-path')
......@@ -1340,18 +1343,24 @@ endforeach
inc_include = include_directories('include')
gl_priv_reqs = [
gl_priv_reqs = []
if with_glx == 'xlib' or with_glx == 'gallium-xlib'
gl_priv_reqs += ['x11', 'xext', 'xcb']
elif with_glx == 'dri'
gl_priv_reqs += [
'x11', 'xext', 'xdamage >= 1.1', 'xfixes', 'x11-xcb', 'xcb',
'xcb-glx >= 1.8.1']
if with_dri_platform == 'drm'
gl_priv_reqs += 'xcb-dri2 >= 1.8'
endif
endif
if dep_libdrm.found()
gl_priv_reqs += 'libdrm >= 2.4.75'
endif
if dep_xxf86vm.found()
gl_priv_reqs += 'xxf86vm'
endif
if with_dri_platform == 'drm'
gl_priv_reqs += 'xcb-dri2 >= 1.8'
endif
gl_priv_libs = []
if dep_thread.found()
......
......@@ -97,7 +97,6 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
struct amdgpu_gpu_info *amdinfo)
{
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {};
struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {};
struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {};
......@@ -131,26 +130,6 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
return false;
}
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
return false;
}
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
&vram_vis);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
return false;
}
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
return false;
}
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_DMA, 0, &dma);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
......@@ -255,6 +234,60 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
return false;
}
if (info->drm_minor >= 9) {
struct drm_amdgpu_memory_info meminfo;
r = amdgpu_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), &meminfo);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_info(memory) failed.\n");
return false;
}
/* Note: usable_heap_size values can be random and can't be relied on. */
info->gart_size = meminfo.gtt.total_heap_size;
info->vram_size = meminfo.vram.total_heap_size;
info->vram_vis_size = meminfo.cpu_accessible_vram.total_heap_size;
info->max_alloc_size = MAX2(meminfo.vram.max_allocation,
meminfo.gtt.max_allocation);
} else {
/* This is a deprecated interface, which reports usable sizes
* (total minus pinned), but the pinned size computation is
* buggy, so the values returned from these functions can be
* random.
*/
struct amdgpu_heap_info vram, vram_vis, gtt;
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
return false;
}
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
&vram_vis);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
return false;
}
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
return false;
}
info->gart_size = gtt.heap_size;
info->vram_size = vram.heap_size;
info->vram_vis_size = vram_vis.heap_size;
/* The kernel can split large buffers in VRAM but not in GTT, so large
* allocations can fail or cause buffer movement failures in the kernel.
*/
info->max_alloc_size = MAX2(info->vram_size * 0.9, info->gart_size * 0.7);
}
/* Set chip identification. */
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
info->vce_harvest_config = amdinfo->vce_harvest_config;
......@@ -287,15 +320,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
!(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
/* Set hardware information. */
info->gart_size = gtt.heap_size;
info->vram_size = vram.heap_size;
info->vram_vis_size = vram_vis.heap_size;
info->gds_size = gds.gds_total_size;
info->gds_gfx_partition_size = gds.gds_gfx_partition_size;
/* The kernel can split large buffers in VRAM but not in GTT, so large
* allocations can fail or cause buffer movement failures in the kernel.
*/
info->max_alloc_size = MIN2(info->vram_size * 0.9, info->gart_size * 0.7);
/* convert the shader clock from KHz to MHz */
info->max_shader_clock = amdinfo->max_engine_clk / 1000;
info->max_se = amdinfo->num_shader_engines;
......@@ -316,6 +342,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
/* TODO: Enable this once the kernel handles it efficiently. */
info->has_local_buffers = info->drm_minor >= 20 &&
!info->has_dedicated_vram;
info->kernel_flushes_hdp_before_ib = true;
info->num_render_backends = amdinfo->rb_pipes;
info->clock_crystal_freq = amdinfo->gpu_counter_freq;
if (!info->clock_crystal_freq) {
......@@ -458,6 +486,7 @@ void ac_print_gpu_info(struct radeon_info *info)
printf(" has_fence_to_handle = %u\n", info->has_fence_to_handle);
printf(" has_ctx_priority = %u\n", info->has_ctx_priority);
printf(" has_local_buffers = %u\n", info->has_local_buffers);
printf(" kernel_flushes_hdp_before_ib = %u\n", info->kernel_flushes_hdp_before_ib);
printf("Shader core info:\n");
printf(" max_shader_clock = %i\n", info->max_shader_clock);
......
......@@ -96,6 +96,7 @@ struct radeon_info {
bool has_fence_to_handle;
bool has_ctx_priority;
bool has_local_buffers;
bool kernel_flushes_hdp_before_ib;
/* Shader cores. */
uint32_t r600_max_quad_pipes; /* wave size / 16 */
......
......@@ -624,7 +624,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
config->info.levels == 1);
AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth;
AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
/* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit)
* for Z and stencil. This can cause a number of problems which we work
......
......@@ -1008,6 +1008,68 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
}
}
static void
radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
struct radv_ds_buffer_info *ds,
struct radv_image *image, VkImageLayout layout,
bool requires_cond_write)
{
uint32_t db_z_info = ds->db_z_info;
uint32_t db_z_info_reg;
if (!radv_image_is_tc_compat_htile(image))
return;
if (!radv_layout_has_htile(image, layout,
radv_image_queue_family_mask(image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index))) {
db_z_info &= C_028040_TILE_SURFACE_ENABLE;
}
db_z_info &= C_028040_ZRANGE_PRECISION;
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
db_z_info_reg = R_028038_DB_Z_INFO;
} else {
db_z_info_reg = R_028040_DB_Z_INFO;
}
/* When we don't know the last fast clear value we need to emit a
* conditional packet, otherwise we can update DB_Z_INFO directly.
*/
if (requires_cond_write) {
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
const uint32_t write_space = 0 << 8; /* register */
const uint32_t poll_space = 1 << 4; /* memory */
const uint32_t function = 3 << 0; /* equal to the reference */
const uint32_t options = write_space | poll_space | function;
radeon_emit(cmd_buffer->cs, options);
/* poll address - location of the depth clear value */
uint64_t va = radv_buffer_get_va(image->bo);
va += image->offset + image->clear_value_offset;
/* In presence of stencil format, we have to adjust the base
* address because the first value is the stencil clear value.
*/
if (vk_format_is_stencil(image->vk_format))
va += 4;
radeon_emit(cmd_buffer->cs, va);
radeon_emit(cmd_buffer->cs, va >> 32);
radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */
radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */
radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
radeon_emit(cmd_buffer->cs, 0u); /* write address high */
radeon_emit(cmd_buffer->cs, db_z_info);
} else {
radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
}
}
static void
radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
struct radv_ds_buffer_info *ds,
......@@ -1066,6 +1128,9 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
}
/* Update the ZRANGE_PRECISION value for the TC-compat bug. */
radv_update_zrange_precision(cmd_buffer, ds, image, layout, true);
radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
ds->pa_su_poly_offset_db_fmt_cntl);
}
......@@ -1107,6 +1172,35 @@ radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cmd_buffer->cs, ds_clear_value.stencil); /* R_028028_DB_STENCIL_CLEAR */
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
radeon_emit(cmd_buffer->cs, fui(ds_clear_value.depth)); /* R_02802C_DB_DEPTH_CLEAR */
/* Update the ZRANGE_PRECISION value for the TC-compat bug. This is
* only needed when clearing Z to 0.0.
*/
if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
ds_clear_value.depth == 0.0) {
struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
if (!framebuffer || !subpass)
return;
if (subpass->depth_stencil_attachment.attachment == VK_ATTACHMENT_UNUSED)
return;
int idx = subpass->depth_stencil_attachment.attachment;
VkImageLayout layout = subpass->depth_stencil_attachment.layout;
struct radv_attachment_info *att = &framebuffer->attachments[idx];
struct radv_image *image = att->attachment->image;
/* Only needed if the image is currently bound as the depth
* surface.
*/
if (att->attachment->image != image)
return;
radv_update_zrange_precision(cmd_buffer, &att->ds, image,
layout, false);
}
}
static void
......@@ -1576,6 +1670,11 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT
? cmd_buffer->state.compute_pipeline
: cmd_buffer->state.pipeline;
VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ?
VK_PIPELINE_BIND_POINT_COMPUTE :
VK_PIPELINE_BIND_POINT_GRAPHICS;
struct radv_descriptor_state *descriptors_state =
radv_get_descriptors_state(cmd_buffer, bind_point);
struct radv_pipeline_layout *layout = pipeline->layout;
struct radv_shader_variant *shader, *prev_shader;
unsigned offset;
......@@ -1593,7 +1692,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
return;
memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
memcpy((char*)ptr + layout->push_constant_size, cmd_buffer->dynamic_buffers,
memcpy((char*)ptr + layout->push_constant_size,
descriptors_state->dynamic_buffers,
16 * layout->dynamic_offset_count);
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
......@@ -2097,7 +2197,8 @@ VkResult radv_BeginCommandBuffer(
}
}
if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
assert(pBeginInfo->pInheritanceInfo);
cmd_buffer->state.framebuffer = radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
cmd_buffer->state.pass = radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
......@@ -2226,6 +2327,8 @@ void radv_CmdBindDescriptorSets(
unsigned dyn_idx = 0;
const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
struct radv_descriptor_state *descriptors_state =
radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
for (unsigned i = 0; i < descriptorSetCount; ++i) {
unsigned idx = i + firstSet;
......@@ -2234,7 +2337,7 @@ void radv_CmdBindDescriptorSets(
for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
uint32_t *dst = cmd_buffer->dynamic_buffers + idx * 4;
uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4;
assert(dyn_idx < dynamicOffsetCount);
struct radv_descriptor_range *range = set->dynamic_descriptors + j;
......@@ -3706,6 +3809,20 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
size, clear_word);
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
/* Initialize the depth clear registers and update the ZRANGE_PRECISION
* value for the TC-compat bug (because ZRANGE_PRECISION is 1 by
* default). This is only needed whean clearing Z to 0.0f.
*/
if (radv_image_is_tc_compat_htile(image) && clear_word == 0) {
VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
VkClearDepthStencilValue value = {};
if (vk_format_is_stencil(image->vk_format))
aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
radv_set_depth_clear_regs(cmd_buffer, image, value, aspects);
}
}
static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
......@@ -3720,14 +3837,7 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
if (!radv_image_has_htile(image))
return;
if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
(pending_clears & vk_format_aspects(image->vk_format)) == vk_format_aspects(image->vk_format) &&
cmd_buffer->state.render_area.offset.x == 0 && cmd_buffer->state.render_area.offset.y == 0 &&
cmd_buffer->state.render_area.extent.width == image->info.width &&
cmd_buffer->state.render_area.extent.height == image->info.height) {
/* The clear will initialize htile. */
return;
} else if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
/* TODO: merge with the clear if applicable */
radv_initialize_htile(cmd_buffer, image, range, 0);
......
......@@ -717,6 +717,14 @@ emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer,
if ((clear_value.depth != 0.0 && clear_value.depth != 1.0) || !(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
goto fail;
/* GFX8 only supports 32-bit depth surfaces but we can enable TC-compat
* HTILE for 16-bit surfaces if no Z planes are compressed. Though,
* fast HTILE clears don't seem to work.
*/
if (cmd_buffer->device->physical_device->rad_info.chip_class == VI &&
iview->image->vk_format == VK_FORMAT_D16_UNORM)
goto fail;
if (vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) {
if (clear_value.stencil != 0 || !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT))
goto fail;
......
......@@ -3051,7 +3051,6 @@ static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
ctx->ac.i32_0, "");
ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, "");
ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, "");
ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, "");
ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, "");
}
......
......@@ -524,20 +524,21 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
col_format |= cf << (4 * i);
}
blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
if (blend->mrt0_is_dual_src)
col_format |= (col_format & 0xf) << 4;
blend->spi_shader_col_format = col_format;
/* If the i-th target format is set, all previous target formats must
* be non-zero to avoid hangs.
*/
num_targets = (util_last_bit(blend->spi_shader_col_format) + 3) / 4;
num_targets = (util_last_bit(col_format) + 3) / 4;
for (unsigned i = 0; i < num_targets; i++) {
if (!(blend->spi_shader_col_format & (0xf << (i * 4))))
blend->spi_shader_col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
if (!(col_format & (0xf << (i * 4)))) {
col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
}
}
blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
if (blend->mrt0_is_dual_src)
col_format |= (col_format & 0xf) << 4;
blend->spi_shader_col_format = col_format;
}
static bool
......@@ -621,7 +622,7 @@ radv_blend_check_commutativity(struct radv_blend_state *blend,
(1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA);
if (dst == VK_BLEND_FACTOR_ONE &&
(src_allowed && (1u << src))) {
(src_allowed & (1u << src))) {
/* Addition is commutative, but floating point addition isn't
* associative: subtle changes can be introduced via different
* rounding. Be conservative, only enable for min and max.
......
......@@ -928,6 +928,7 @@ struct radv_descriptor_state {
uint32_t valid;
struct radv_push_descriptor_set push_set;
bool push_dirty;
uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
};
struct radv_cmd_state {
......@@ -1013,7 +1014,6 @@ struct radv_cmd_buffer {
uint32_t queue_family_index;
uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
VkShaderStageFlags push_constant_stages;
struct radv_descriptor_set meta_push_descriptors;
......
......@@ -808,7 +808,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
statistics.numAvailableVgprs = statistics.numPhysicalVgprs /
ceil(workgroup_size / statistics.numPhysicalVgprs);
ceil((double)workgroup_size / statistics.numPhysicalVgprs);
statistics.computeWorkGroupSize[0] = local_size[0];
statistics.computeWorkGroupSize[1] = local_size[1];
......
......@@ -119,6 +119,9 @@ def test_unix(args):
for l in diff:
print(l, file=sys.stderr)
if not total:
raise Exception('Could not find any tests.')
print('{}/{}'.format(passed, total), 'tests returned correct results')
return total == passed
......@@ -155,6 +158,9 @@ def _replace_test(args, replace):
for l in diff:
print(l, file=sys.stderr)
if not total:
raise Exception('Could not find any tests.')
print('{}/{}'.format(passed, total), 'tests returned correct results')
return total == passed
......@@ -197,6 +203,9 @@ def test_valgrind(args):
print('FAIL')
print(log, file=sys.stderr)
if not total:
raise Exception('Could not find any tests.')
print('{}/{}'.format(passed, total), 'tests returned correct results')
return total == passed
......
......@@ -2676,6 +2676,7 @@ external_declaration:
| declaration { $$ = $1; }
| pragma_statement { $$ = NULL; }
| layout_defaults { $$ = $1; }
| ';' { $$ = NULL; }
;
function_definition:
......
......@@ -347,6 +347,8 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir)
if (lhs_var != NULL && rhs_var != NULL && lhs_var != rhs_var) {
if (lhs_var->data.mode != ir_var_shader_storage &&
lhs_var->data.mode != ir_var_shader_shared &&
rhs_var->data.mode != ir_var_shader_storage &&
rhs_var->data.mode != ir_var_shader_shared &&
lhs_var->data.precise == rhs_var->data.precise) {
_mesa_hash_table_insert(acp, lhs_var, rhs_var);
}
......
......@@ -544,6 +544,10 @@ ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
return;
if (lhs->var->data.mode == ir_var_shader_storage ||
lhs->var->data.mode == ir_var_shader_shared)
return;
ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
if (!rhs) {
ir_swizzle *swiz = ir->rhs->as_swizzle();
......@@ -560,6 +564,10 @@ ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
orig_swizzle[3] = swiz->mask.w;
}
if (rhs->var->data.mode == ir_var_shader_storage ||
rhs->var->data.mode == ir_var_shader_shared)
return;
/* Move the swizzle channels out to the positions they match in the
* destination. We don't want to have to rewrite the swizzle[]
* array every time we clear a bit of the write_mask.
......
......@@ -323,6 +323,14 @@ write_xfb(struct blob *metadata, struct gl_shader_program *shProg)
blob_write_uint32(metadata, prog->info.stage);
/* Data set by glTransformFeedbackVaryings. */
blob_write_uint32(metadata, shProg->TransformFeedback.BufferMode);
blob_write_bytes(metadata, shProg->TransformFeedback.BufferStride,
sizeof(shProg->TransformFeedback.BufferStride));
blob_write_uint32(metadata, shProg->TransformFeedback.NumVarying);
for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; i++)
blob_write_string(metadata, shProg->TransformFeedback.VaryingNames[i]);
blob_write_uint32(metadata, ltf->NumOutputs);
blob_write_uint32(metadata, ltf->ActiveBuffers);
blob_write_uint32(metadata, ltf->NumVarying);
......@@ -352,6 +360,18 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg)
if (xfb_stage == ~0u)
return;
/* Data set by glTransformFeedbackVaryings. */
shProg->TransformFeedback.BufferMode = blob_read_uint32(metadata);
blob_copy_bytes(metadata, &shProg->TransformFeedback.BufferStride,
sizeof(shProg->TransformFeedback.BufferStride));
shProg->TransformFeedback.NumVarying = blob_read_uint32(metadata);
shProg->TransformFeedback.VaryingNames = (char **)
malloc(shProg->TransformFeedback.NumVarying * sizeof(GLchar *));
/* Note, malloc used with VaryingNames. */
for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; i++)
shProg->TransformFeedback.VaryingNames[i] =
strdup(blob_read_string(metadata));
struct gl_program *prog = shProg->_LinkedShaders[xfb_stage]->Program;
struct gl_transform_feedback_info *ltf =
rzalloc(prog, struct gl_transform_feedback_info);
......