Skip to content
Commits on Source (46)
......@@ -52,6 +52,7 @@ LOCAL_CFLAGS += \
-DHAVE___BUILTIN_EXPECT \
-DHAVE___BUILTIN_FFS \
-DHAVE___BUILTIN_FFSLL \
-DHAVE_DLFCN_H \
-DHAVE_FUNC_ATTRIBUTE_FLATTEN \
-DHAVE_FUNC_ATTRIBUTE_UNUSED \
-DHAVE_FUNC_ATTRIBUTE_FORMAT \
......
......@@ -30,3 +30,18 @@ cac7ab1192eefdd8d8b3f25053fb006b5c330eb8
# stable branch
#
a2f5292c82ad07731d633b36a663e46adc181db9
# This patch required manual backport, which was provided as
# 3953467ee7851792c1d4b1c9435499545a7da9fc
#
4a67ce886a7b3def5f66c1aedf9e5436d157a03c
# This patch required manual backport, which was provided as
# 31677c5aa867e457cd06ae25150be2155e8da3c6
#
1f616a840eac02241c585d28e9dac8f19a297f39
# Jason de-nominated this because it "a) shouldn't be needed and b) is horribly
# broken"
#
11712b9ca17e4e1a819dcb7d020e19c6da77bc90
......@@ -880,6 +880,7 @@ AC_HEADER_MAJOR
AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
AC_CHECK_HEADERS([endian.h])
AC_CHECK_HEADER([dlfcn.h], [DEFINES="$DEFINES -DHAVE_DLFCN_H"])
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
AC_CHECK_FUNC([timespec_get], [DEFINES="$DEFINES -DHAVE_TIMESPEC_GET"])
......
mesa (18.1.5-1) unstable; urgency=medium
* New upstream release.
-- Timo Aaltonen <tjaalton@debian.org> Mon, 30 Jul 2018 14:30:06 +0300
mesa (18.1.4-1) unstable; urgency=medium
[ Emilio Pozuelo Monfort ]
......
......@@ -31,8 +31,8 @@ Compatibility contexts may report a lower version depending on each driver.
<h2>SHA256 checksums</h2>
<pre>
TBD
TBD
SHA256: 8acd42e4ac4d1e96ed22344073b3d4fef03d10f225f4eaf3f88c001dfc10e2db mesa-18.1.4.tar.gz
SHA256: 3061488b5d85504092cf4343816cfb2d96f2ad9bc2edec31fc96933d184cf58b mesa-18.1.4.tar.xz
</pre>
......
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 18.1.4 Release Notes / July 13 2018</h1>
<p>
Mesa 18.1.5 is a bug fix release which fixes bugs found since the 18.1.4 release.
</p>
<p>
Mesa 18.1.5 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation.
Compatibility contexts may report a lower version depending on each driver.
</p>
<h2>SHA256 checksums</h2>
<pre>
TBD
TBD
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103274">Bug 103274</a> - BRW allocates too much heap memory</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107275">Bug 107275</a> - NIR segfaults after spirv-opt</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107295">Bug 107295</a> - Access violation on glDrawArrays with count &gt;= 2048</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107312">Bug 107312</a> - Mesa-git RPM build fails after commit 8cacf38f527d42e41441ef8c25d95d4b2f4e8602</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107366">Bug 107366</a> - NIR verification crashes on piglit tests</li>
</ul>
<h2>Changes</h2>
<p>Alex Smith (1):</p>
<ul>
<li>anv: Pay attention to VK_ACCESS_MEMORY_(READ|WRITE)_BIT</li>
</ul>
<p>Bas Nieuwenhuizen (7):</p>
<ul>
<li>radv: Select correct entries for binning.</li>
<li>radv: Fix number of samples used for binning.</li>
<li>radv: Disable disabled color buffers in rbplus opts.</li>
<li>nir: Do not use continue block after removing it.</li>
<li>util/disk_cache: Fix disk_cache_get_function_timestamp with disabled cache.</li>
<li>nir: Fix end of function without return warning/error.</li>
<li>radv: Still enable inmemory &amp; API level caching if disk cache is not enabled.</li>
</ul>
<p>Chad Versace (2):</p>
<ul>
<li>anv/android: Fix type error in call to vk_errorf()</li>
<li>anv/android: Fix Autotools build for VK_ANDROID_native_buffer</li>
</ul>
<p>Chih-Wei Huang (1):</p>
<ul>
<li>Android: fix a missing nir_intrinsics.h error</li>
</ul>
<p>Danylo Piliaiev (1):</p>
<ul>
<li>i965: Sweep NIR after linking phase to free held memory</li>
</ul>
<p>Dave Airlie (1):</p>
<ul>
<li>r600: enable tess_input_info for TES</li>
</ul>
<p>Dylan Baker (5):</p>
<ul>
<li>docs: Add sha256 sums for 18.1.4 tarballs</li>
<li>cherry-ignore: add 4a67ce886a7b3def5f66c1aedf9e5436d157a03c</li>
<li>cherry-ignore: Add 1f616a840eac02241c585d28e9dac8f19a297f39</li>
<li>cherry-ignore: add 11712b9ca17e4e1a819dcb7d020e19c6da77bc90</li>
<li>bump version to 18.1.5</li>
</ul>
<p>Eric Anholt (2):</p>
<ul>
<li>vc4: Don't automatically reallocate a PERSISTENT-mapped buffer.</li>
<li>meson: Move xvmc test tools from unit tests to installed tools.</li>
</ul>
<p>Harish Krupo (1):</p>
<ul>
<li>egl: Fix missing clamping in eglSetDamageRegionKHR</li>
</ul>
<p>Jan Vesely (3):</p>
<ul>
<li>radeonsi: Refuse to accept code with unhandled relocations</li>
<li>clover: Report error when pipe driver fails to create compute state</li>
<li>clover: Catch errors from executing event action</li>
</ul>
<p>Jason Ekstrand (6):</p>
<ul>
<li>anv: Stop setting 3DSTATE_PS_EXTRA::PixelShaderHasUAV</li>
<li>nir/serialize: Alloc constants off the variable</li>
<li>blorp: Handle the RGB workaround more like other workarounds</li>
<li>intel/blorp: Handle 3-component formats in clears</li>
<li>intel/compiler: Account for built-in uniforms in analyze_ubo_ranges</li>
<li>spirv: Fix a couple of image atomic load/store bugs</li>
</ul>
<p>José Fonseca (1):</p>
<ul>
<li>gallium/tests: Don't ignore S3TC errors.</li>
</ul>
<p>Karol Herbst (1):</p>
<ul>
<li>nir: fix printing of vec16 type</li>
</ul>
<p>Lepton Wu (1):</p>
<ul>
<li>virgl: Fix flush in virgl_encoder_inline_write.</li>
</ul>
<p>Lucas Stach (1):</p>
<ul>
<li>st/mesa: call resource_changed when binding a EGLImage to a texture</li>
</ul>
<p>Mauro Rossi (2):</p>
<ul>
<li>radv: winsys/amdgpu: include missing pthread.h header</li>
<li>android: util/disk_cache: fix building errors in gallium drivers</li>
</ul>
<p>Michel Dänzer (1):</p>
<ul>
<li>gallium: Check pipe_screen::resource_changed before dereferencing it</li>
</ul>
<p>Roland Scheidegger (1):</p>
<ul>
<li>draw: force draw pipeline if there's more than 65535 vertices</li>
</ul>
<p>Samuel Iglesias Gonsálvez (1):</p>
<ul>
<li>anv: fix assert in anv_CmdBindDescriptorSets()</li>
</ul>
<p>Samuel Pitoiset (3):</p>
<ul>
<li>radv: make sure to wait for CP DMA when needed</li>
<li>radv: emit a dummy ZPASS_DONE to prevent GPU hangs on GFX9</li>
<li>radv: fix a memleak for merged shaders on GFX9</li>
</ul>
</div>
</body>
</html>
......@@ -54,7 +54,7 @@ with_osmesa = get_option('osmesa')
with_swr_arches = get_option('swr-arches').split(',')
with_tools = get_option('tools').split(',')
if with_tools.contains('all')
with_tools = ['freedreno', 'glsl', 'intel', 'nir', 'nouveau']
with_tools = ['freedreno', 'glsl', 'intel', 'nir', 'nouveau', 'xvmc']
endif
if get_option('texture-float')
pre_args += '-DTEXTURE_FLOAT_ENABLED'
......@@ -928,7 +928,7 @@ elif cc.has_header_symbol('sys/mkdev.h', 'major')
pre_args += '-DMAJOR_IN_MKDEV'
endif
foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h']
foreach h : ['xlocale.h', 'sys/sysctl.h', 'linux/futex.h', 'endian.h', 'dlfcn.h']
if cc.compiles('#include <@0@>'.format(h), name : '@0@'.format(h))
pre_args += '-DHAVE_@0@'.format(h.to_upper().underscorify())
endif
......
......@@ -284,5 +284,5 @@ option(
'tools',
type : 'string',
value : '',
description : 'Comma delimited list of tools to build. choices : freedreno,glsl,intel,nir,nouveau or all'
description : 'Comma delimited list of tools to build. choices : freedreno,glsl,intel,nir,nouveau,xvmc or all'
)
......@@ -319,11 +319,21 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
}
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
unsigned num_db = cmd_buffer->device->physical_device->rad_info.num_render_backends;
unsigned eop_bug_offset;
void *fence_ptr;
radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
&cmd_buffer->gfx9_fence_offset,
&fence_ptr);
cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;
/* Allocate a buffer for the EOP bug on GFX9. */
radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
&eop_bug_offset, &fence_ptr);
cmd_buffer->gfx9_eop_bug_va =
radv_buffer_get_va(cmd_buffer->upload.upload_bo);
cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
}
cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
......@@ -473,7 +483,7 @@ radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->device->physical_device->rad_info.chip_class,
ptr, va,
radv_cmd_buffer_uses_mec(cmd_buffer),
flags);
flags, cmd_buffer->gfx9_eop_bug_va);
}
if (unlikely(cmd_buffer->device->trace_bo))
......@@ -681,8 +691,11 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
unsigned sx_blend_opt_control = 0;
for (unsigned i = 0; i < subpass->color_count; ++i) {
if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
continue;
}
int idx = subpass->color_attachments[i].attachment;
struct radv_color_buffer_info *cb = &framebuffer->attachments[idx].cb;
......@@ -796,6 +809,10 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
}
}
for (unsigned i = subpass->color_count; i < 8; ++i) {
sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
}
radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
......@@ -2500,6 +2517,11 @@ VkResult radv_EndCommandBuffer(
si_emit_cache_flush(cmd_buffer);
}
/* Make sure CP DMA is idle at the end of IBs because the kernel
* doesn't wait for it.
*/
si_cp_dma_wait_for_idle(cmd_buffer);
vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
......@@ -4054,6 +4076,11 @@ void radv_CmdPipelineBarrier(
0);
}
/* Make sure CP DMA is idle because the driver might have performed a
* DMA operation for copying or filling buffers/images.
*/
si_cp_dma_wait_for_idle(cmd_buffer);
cmd_buffer->state.flush_bits |= dst_flush_bits;
}
......@@ -4070,6 +4097,11 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18);
/* Make sure CP DMA is idle because the driver might have performed a
* DMA operation for copying or filling buffers/images.
*/
si_cp_dma_wait_for_idle(cmd_buffer);
/* TODO: this is overkill. Probably should figure something out from
* the stage mask. */
......@@ -4078,7 +4110,8 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->device->physical_device->rad_info.chip_class,
radv_cmd_buffer_uses_mec(cmd_buffer),
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, va, 2, value);
1, va, 2, value,
cmd_buffer->gfx9_eop_bug_va);
assert(cmd_buffer->cs->cdw <= cdw_max);
}
......
......@@ -2181,7 +2181,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2);
RADV_CMD_FLAG_INV_GLOBAL_L2, 0);
} else if (i == 1) {
si_cs_emit_cache_flush(cs,
queue->device->physical_device->rad_info.chip_class,
......@@ -2191,7 +2191,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2);
RADV_CMD_FLAG_INV_GLOBAL_L2, 0);
}
if (!queue->device->ws->cs_finalize(cs))
......
......@@ -2154,7 +2154,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
free(codes[i]);
if (modules[i]) {
if (nir[i]) {
if (!pipeline->device->keep_shader_info)
ralloc_free(nir[i]);
......@@ -2437,7 +2437,7 @@ radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCr
pipeline->device->physical_device->rad_info.max_se);
unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_mode_cntl_1);
unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
unsigned effective_samples = total_samples;
unsigned color_bytes_per_pixel = 0;
......@@ -2462,7 +2462,7 @@ radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCr
}
const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
while(color_entry->bpp <= color_bytes_per_pixel)
while(color_entry[1].bpp <= color_bytes_per_pixel)
++color_entry;
extent = color_entry->extent;
......@@ -2476,7 +2476,7 @@ radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCr
unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
while(ds_entry->bpp <= ds_bytes_per_pixel)
while(ds_entry[1].bpp <= ds_bytes_per_pixel)
++ds_entry;
extent.width = MIN2(extent.width, ds_entry->extent.width);
......
......@@ -248,7 +248,6 @@ radv_is_cache_disabled(struct radv_device *device)
* MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
*/
return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
!device->physical_device->disk_cache ||
device->keep_shader_info;
}
......@@ -271,7 +270,7 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
/* Don't cache when we want debug info, since this isn't
* present in the cache.
*/
if (radv_is_cache_disabled(device)) {
if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) {
pthread_mutex_unlock(&cache->mutex);
return false;
}
......
......@@ -972,6 +972,9 @@ struct radv_cmd_state {
uint32_t last_num_instances;
uint32_t last_first_instance;
uint32_t last_vertex_offset;
/* Whether CP DMA is busy/idle. */
bool dma_is_busy;
};
struct radv_cmd_pool {
......@@ -1034,6 +1037,7 @@ struct radv_cmd_buffer {
uint32_t gfx9_fence_offset;
struct radeon_winsys_bo *gfx9_fence_bo;
uint32_t gfx9_fence_idx;
uint64_t gfx9_eop_bug_va;
/**
* Whether a query pool has been resetted and we have to flush caches.
......@@ -1066,7 +1070,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
unsigned data_sel,
uint64_t va,
uint32_t old_fence,
uint32_t new_fence);
uint32_t new_fence,
uint64_t gfx9_eop_bug_va);
void si_emit_wait_fence(struct radeon_winsys_cs *cs,
bool predicated,
......@@ -1076,7 +1081,8 @@ void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
uint32_t *fence_ptr, uint64_t va,
bool is_mec,
enum radv_cmd_flush_bits flush_bits);
enum radv_cmd_flush_bits flush_bits,
uint64_t gfx9_eop_bug_va);
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va);
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
......@@ -1086,6 +1092,8 @@ void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
unsigned size);
void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
uint64_t size, unsigned value);
void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
bool
radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
......
......@@ -1169,7 +1169,8 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->device->physical_device->rad_info.chip_class,
radv_cmd_buffer_uses_mec(cmd_buffer),
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, avail_va, 0, 1);
1, avail_va, 0, 1,
cmd_buffer->gfx9_eop_bug_va);
break;
default:
unreachable("ending unhandled query type");
......@@ -1292,13 +1293,15 @@ void radv_CmdWriteTimestamp(
cmd_buffer->device->physical_device->rad_info.chip_class,
mec,
V_028A90_BOTTOM_OF_PIPE_TS, 0,
3, query_va, 0, 0);
3, query_va, 0, 0,
cmd_buffer->gfx9_eop_bug_va);
si_cs_emit_write_event_eop(cs,
false,
cmd_buffer->device->physical_device->rad_info.chip_class,
mec,
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, avail_va, 0, 1);
1, avail_va, 0, 1,
cmd_buffer->gfx9_eop_bug_va);
break;
}
query_va += pool->stride;
......
......@@ -852,7 +852,8 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
unsigned data_sel,
uint64_t va,
uint32_t old_fence,
uint32_t new_fence)
uint32_t new_fence,
uint64_t gfx9_eop_bug_va)
{
unsigned op = EVENT_TYPE(event) |
EVENT_INDEX(5) |
......@@ -860,6 +861,17 @@ void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
if (chip_class >= GFX9 || is_gfx8_mec) {
/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
* counters) must immediately precede every timestamp event to
* prevent a GPU hang on GFX9.
*/
if (chip_class == GFX9) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
radeon_emit(cs, gfx9_eop_bug_va);
radeon_emit(cs, gfx9_eop_bug_va >> 32);
}
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, predicated));
radeon_emit(cs, op);
radeon_emit(cs, EOP_DATA_SEL(data_sel));
......@@ -941,7 +953,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
uint32_t *flush_cnt,
uint64_t flush_va,
bool is_mec,
enum radv_cmd_flush_bits flush_bits)
enum radv_cmd_flush_bits flush_bits,
uint64_t gfx9_eop_bug_va)
{
unsigned cp_coher_cntl = 0;
uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
......@@ -971,7 +984,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
chip_class,
is_mec,
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
0, 0, 0, 0, 0);
0, 0, 0, 0, 0,
gfx9_eop_bug_va);
}
}
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
......@@ -1057,7 +1071,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
uint32_t old_fence = (*flush_cnt)++;
si_cs_emit_write_event_eop(cs, false, chip_class, false, cb_db_event, tc_flags, 1,
flush_va, old_fence, *flush_cnt);
flush_va, old_fence, *flush_cnt,
gfx9_eop_bug_va);
si_emit_wait_fence(cs, false, flush_va, *flush_cnt, 0xffffffff);
}
......@@ -1149,7 +1164,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
cmd_buffer->device->physical_device->rad_info.chip_class,
ptr, va,
radv_cmd_buffer_uses_mec(cmd_buffer),
cmd_buffer->state.flush_bits);
cmd_buffer->state.flush_bits,
cmd_buffer->gfx9_eop_bug_va);
if (unlikely(cmd_buffer->device->trace_bo))
......@@ -1214,7 +1230,6 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint32_t header = 0, command = 0;
assert(size);
assert(size <= cp_dma_max_byte_count(cmd_buffer));
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
......@@ -1273,11 +1288,16 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
* indices. If we wanted to execute CP DMA in PFP, this packet
* should precede it.
*/
if ((flags & CP_DMA_SYNC) && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
if (flags & CP_DMA_SYNC) {
if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
radeon_emit(cs, 0);
}
/* CP will see the sync flag and wait for all DMAs to complete. */
cmd_buffer->state.dma_is_busy = false;
}
if (unlikely(cmd_buffer->device->trace_bo))
radv_cmd_buffer_trace_emit(cmd_buffer);
}
......@@ -1339,6 +1359,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
uint64_t main_src_va, main_dest_va;
uint64_t skipped_size = 0, realign_size = 0;
/* Assume that we are not going to sync after the last DMA operation. */
cmd_buffer->state.dma_is_busy = true;
if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
......@@ -1402,6 +1424,9 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
assert(va % 4 == 0 && size % 4 == 0);
/* Assume that we are not going to sync after the last DMA operation. */
cmd_buffer->state.dma_is_busy = true;
while (size) {
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
unsigned dma_flags = CP_DMA_CLEAR;
......@@ -1417,6 +1442,25 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
}
}
void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
{
if (cmd_buffer->device->physical_device->rad_info.chip_class < CIK)
return;
if (!cmd_buffer->state.dma_is_busy)
return;
/* Issue a dummy DMA that copies zero bytes.
*
* The DMA engine will see that there's no work to do and skip this
* DMA request, however, the CP will see the sync flag and still wait
* for all DMAs to complete.
*/
si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
cmd_buffer->state.dma_is_busy = false;
}
/* For MSAA sample positions. */
#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
(((s0x) & 0xf) | (((unsigned)(s0y) & 0xf) << 4) | \
......
......@@ -33,6 +33,7 @@
#include "addrlib/addrinterface.h"
#include <amdgpu.h>
#include "util/list.h"
#include <pthread.h>
struct radv_amdgpu_winsys {
struct radeon_winsys base;
......
......@@ -46,6 +46,7 @@ LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
# Modules using libmesa_nir must set LOCAL_GENERATED_SOURCES to this
MESA_GEN_NIR_H := $(addprefix $(call local-generated-sources-dir)/, \
nir/nir_opcodes.h \
nir/nir_intrinsics.h \
nir/nir_builder_opcodes.h)
nir_builder_opcodes_gen := $(LOCAL_PATH)/nir/nir_builder_opcodes_h.py
......
......@@ -24,6 +24,28 @@
#include "nir.h"
#include "nir_control_flow.h"
/**
* Gets the single block that jumps back to the loop header. Already assumes
* there is exactly one such block.
*/
static nir_block*
find_continue_block(nir_loop *loop)
{
nir_block *header_block = nir_loop_first_block(loop);
nir_block *prev_block =
nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
assert(header_block->predecessors->entries == 2);
struct set_entry *pred_entry;
set_foreach(header_block->predecessors, pred_entry) {
if (pred_entry->key != prev_block)
return (nir_block*)pred_entry->key;
}
unreachable("Continue block not found!");
}
/**
* This optimization detects if statements at the tops of loops where the
* condition is a phi node of two constants and moves half of the if to above
......@@ -95,12 +117,7 @@ opt_peel_loop_initial_if(nir_loop *loop)
if (header_block->predecessors->entries != 2)
return false;
nir_block *continue_block = NULL;
struct set_entry *pred_entry;
set_foreach(header_block->predecessors, pred_entry) {
if (pred_entry->key != prev_block)
continue_block = (void *)pred_entry->key;
}
nir_block *continue_block = find_continue_block(loop);
nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
if (!if_node || if_node->type != nir_cf_node_if)
......@@ -191,6 +208,10 @@ opt_peel_loop_initial_if(nir_loop *loop)
nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
nir_cf_reinsert(&header, nir_after_block_before_jump(continue_block));
/* Get continue block again as the previous reinsert might have removed the block. */
continue_block = find_continue_block(loop);
nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
nir_after_cf_list(continue_list));
nir_cf_reinsert(&tmp, nir_after_block_before_jump(continue_block));
......
......@@ -87,6 +87,7 @@ print_register(nir_register *reg, print_state *state)
static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4",
"error", "error", "error", "vec8",
"error", "error", "error", "error",
"error", "error", "error", "vec16"};
static void
......